Figure 5 and supplementary figure 5

Author

Niccolò Arecco, Ivano Mocavini and Enrique Blanco

Last code execution: 2024 February 07, Wednesday @ 21:02:47.

1 Intro

ChIP-seq analysis.

To fetch some of the inclusion tables (PSI data) stored on the CRG cluster I mount the server on my local computer with the following command in the terminal.

Code
sshfs narecco@ant-login.linux.crg.es:/users/mirimia ~/mnt

which prompts my password and then allows me to navigate the cluster directories.

2 Set Up

General info.

2.1 Packages

Load packages required for the analysis and suppress any message. Check the Section 5 section at the end for package versions.

Code
library(dplyr, warn.conflicts = F, quietly = T)
library(readr)
library(readxl)
library(stringr)
library(tidyr)
library(tibble)
library(ggplot2)
library(ggforce)
library(ggsignif)
library(ggh4x)
library(ggrastr) # for rasterizing only the geom_point layer in a plot with many data-points
library(rstatix)
library(janitor)
library(DT)
library(BioVenn) # Venn Diagram
library(caroline) # Spie chart
library(patchwork)

2.2 Functions

Define a plot style.

Code
theme_classic(base_family = 'Arial', base_size = 6) +
  theme(axis.text = element_text(colour = 'black'),
        axis.ticks.length = unit(1, 'mm'),
        axis.ticks = element_line(linewidth = 0.1),
        axis.line = element_line(linewidth = 0.1),
        axis.title = element_text(size = 5),
        panel.grid.major = element_line(linewidth = 0.2, colour = 'gray84'),
        strip.text = element_text(size = 10),
        strip.background = element_blank(),
        legend.title = element_blank(), 
        legend.key.size = unit(1, 'mm'),
        legend.position = c(0.21, 0.92),
        legend.box.background = element_blank(),
        legend.margin = margin(t = -1, b = 1, unit = 'mm'),
        plot.background = element_blank(),
        panel.background = element_blank()) -> scatter_th

theme_classic(base_family = 'Arial', base_size = 6) +
  theme(axis.text = element_text(colour = 'black'),
        axis.text.x = element_text(angle = 45, hjust = 1, margin = margin(t = -0.4, unit = 'mm') ),
        axis.title.x = element_blank(),
        axis.ticks.length.y = unit(1, 'mm'),
        axis.ticks = element_line(linewidth = 0.1),
        axis.ticks.x = element_blank(),
        axis.line = element_line(linewidth = 0.1),
        axis.title = element_text(size = 5),
        panel.grid.major.y = element_line(linewidth = 0.2, colour = 'gray84'),
        panel.background = element_blank(),
        panel.spacing = unit(3, 'mm'),
        strip.text = element_text(size = 5),
        strip.background = element_rect(linewidth = 0.1),
        plot.background = element_blank() ) -> boxplot_th

theme_classic(base_family = 'Arial', base_size = 6) +
  theme(axis.text = element_text(colour = 'black'),
        axis.text.x = element_text(margin = margin(t = -0.3, unit = 'mm') ),
        axis.ticks.length.y = unit(1, 'mm'),
        axis.ticks = element_line(linewidth = 0.1),
        axis.ticks.x = element_blank(),
        axis.line = element_line(linewidth = 0.1),
        axis.title = element_text(size = 5),
        panel.grid.major = element_line(linewidth = 0.2, colour = 'gray84'),
        panel.background = element_blank(),
        panel.spacing = unit(3, 'mm'),
        legend.position = c(0.93, 0.87),
        legend.key.size = unit(1, 'mm'),
        legend.title = element_blank(),
        legend.box.background = element_blank(),
        legend.margin = margin(t = -1, b = 1, unit = 'mm'),
        strip.text = element_text(size = 5),
        strip.background = element_rect(linewidth = 0.1),
        plot.background = element_blank() ) -> profile_th

Function import GENEprofile files from SeqCode

Code
# read "GENEprofile" file types from SeqCode
read_prfl <- function(path_dir, txt_name, epitope, geneset_type, sample_name) {
  df_in <- read_delim(file = file.path(path_dir, txt_name), 
                      delim = '\t', show_col_types = FALSE, col_names = F ) |>
    setNames( c("MetaPosition", paste0(epitope, "_signal") ) ) |>
    mutate(type = geneset_type, Sample = sample_name ) 
  return(df_in)
}

2.3 Directories & File Paths

Here I organise all the file and directories- paths I need to run the analysis and define where to save the processed tables and figures.

Code
oneDrive_Dir <- file.path("~/OneDrive - CRG - Centre de Regulacio Genomica/Suz12_AS_project")  
code_dir_fig <- file.path(oneDrive_Dir, "_Code/Fig5")
tbl_dir_fig5 <- file.path(code_dir_fig, "tables")
pdf_dir_fig5 <- file.path(code_dir_fig, "pdfs")

if (!dir.exists(pdf_dir_fig5)) { dir.create(pdf_dir_fig5, recursive = T) }
if (!dir.exists(tbl_dir_fig5)) { dir.create(tbl_dir_fig5, recursive = T) }

Path to DropBox folders where Enrique releases the documents.

Code
dropbox_dir <- file.path('~/Dropbox (CRG ADV)/54_Suz12AS')

round2_h3k27me3_peaks_dir <- file.path(dropbox_dir, 'ROUND2_H3K27me3_ChIP/GENESETS/MACS1')
stopifnot(dir.exists(round2_h3k27me3_peaks_dir))
round3_suz12_peaks_dir <- file.path(dropbox_dir, 'ROUND3 - SUZ12 ChIP/files/genesets')
stopifnot(dir.exists(round3_suz12_peaks_dir))

round17_meta_dir <- file.path(dropbox_dir, 'ROUND17_Figure3/01_files_for_boxplotofmetaplots')
round17_scatter_dir <- file.path(dropbox_dir, 'ROUND17_Figure3/02_files_for_scattersboxplots')
stopifnot(dir.exists(round17_meta_dir))
stopifnot(dir.exists(round17_scatter_dir))

round21_meta_dir <- file.path(dropbox_dir, 'ROUND21_PcGSubUnitChIPseq/METAPLOTS/GENEPLOTS')
stopifnot(dir.exists(round21_meta_dir))
round23_scatter_dir <- file.path(dropbox_dir, 'ROUND23_FinalFiguresRevision/SCATTERPLOTS')
stopifnot(dir.exists(round23_scatter_dir))
round23_metagene_dir <- file.path(dropbox_dir, 'ROUND23_FinalFiguresRevision/METAPLOTS')
stopifnot(dir.exists(round23_metagene_dir))

List all bed files to be used for the metagene profiles

Code
bed_paths <- list.files(round17_meta_dir, recursive = T, pattern = "*.bed$")

Path where the data to plot meta genes profiles for H3K27me3 are stored.

Code
CGI_Dir <- file.path(oneDrive_Dir, '11_ChIP/Processed_data/Metaplots_CGI')
stopifnot(dir.exists(CGI_Dir))

3 Main Figure Panels

3.1 Spike-in normalised H3K27me3 signal in 2.5kb genomic bins in endogenous cell lines

Import all 2.5 kb bins of H3K27me3.

Code
all_bins25kb_K27me3 <- read_delim(file = file.path(round17_scatter_dir, 'H3K27me3_R3all_bins_2.5Kb_yes.txt'),
                                  delim = '\t', progress = F,
                                  col_names = c('bin', "WT", 'CSex4', 'dex4', 'KO'),
                                  show_col_types = FALSE) |>
  mutate(chr = str_split_fixed(pattern = "\\*", string = bin, n = 3)[,1],
         start = str_split_fixed(pattern = "\\*", string = bin, n = 3)[,2],
         end = str_split_fixed(pattern = "\\*", string = bin, n = 3)[,3]) |>
  relocate(chr, start, end, .after = bin) 

Import the annotated bins that belong to SUZ12 targeted CGI and those that contain a CGI that is not target

Code
bins25kbK27me3_CGI_SUZ12 <- read_delim(
  file = file.path(round17_scatter_dir, 'H3K27me3_R3all_bins_2.5Kb_yes_CGI-SUZ12.txt'),
  delim = c(' '), col_names = c('bin', "garbage"), show_col_types = FALSE
  )

bins25kbK27me3_other_CGI <- read_delim(
  file = file.path(round17_scatter_dir, 'H3K27me3_R3all_bins_2.5Kb_yes_CGI-NOPCG.txt'), 
  delim = c(' '), col_names = c('bin', "garbage"), show_col_types = FALSE
  )

Check how many of the total bins are in each set (SUZ12 targeted CGIs or others CGIs).

Code
table(all_bins25kb_K27me3$bin %in% bins25kbK27me3_CGI_SUZ12$bin)

 FALSE   TRUE 
990222   4189 
Code
table(all_bins25kb_K27me3$bin %in% bins25kbK27me3_other_CGI$bin)

 FALSE   TRUE 
981600  12811 

Map the total bins in each set

Code
all_bins25kb_K27me3 |>
  mutate(type = case_when(bin %in% bins25kbK27me3_CGI_SUZ12$bin ~ "PcG targeted CGI",
                          bin %in% bins25kbK27me3_other_CGI$bin ~ "Other CGI") ) |>
  mutate(type = ifelse(is.na(type), yes = "Rest of the genome", no = type )
         ) -> all_bins25kb_K27me3

Set bin type as factor.

Code
all_bins25kb_K27me3$type <- factor(all_bins25kb_K27me3$type,
                                   levels = c("PcG targeted CGI", "Other CGI", "Rest of the genome") )

Overview of bins file.

Code
datatable(head(all_bins25kb_K27me3, 150), rownames = FALSE, filter = 'top', 
          options = list(pageLength = 5, autoWidth = TRUE) )

Define the flanking bins (-1 and +1 bins) surrounding CGI.

Code
indx_targeted <- which(all_bins25kb_K27me3$type == "PcG targeted CGI")
message(length(indx_targeted), ' PcG targeted bins')
4189 PcG targeted bins

Get first the +1 bins:

If the difference in index number between the targeted bin and the + 1 is 1, meaning the 2 PcG targeted CGI are one next to each other, exclude the +1 bins. Otherwise, label every +1 index number as downstream flanking PcG targeted CGI.

Code
plus_one_targeted <- c()
for (i in 1:(length(indx_targeted)-1) ) {
  
  if ( (indx_targeted[i + 1] - indx_targeted[i]) == 1 ) {
    # Do no nothing
    plus_one_targeted[i] <- 0
    
  } else {
    plus_one_targeted[i] <- indx_targeted[i] + 1
  }
}

Here’s an example

Code
message('Targeted indexes:\t', paste0(head(indx_targeted, 7), sep = ' ') )
Targeted indexes:   983 994 2042 2295 2647 2648 2884 
Code
message('Flanking +1 indexes:\t', paste0(head(plus_one_targeted, 7), sep = ' ') )
Flanking +1 indexes:    984 995 2043 2296 0 2649 2885 

Since bins numbered 2647 and 2648 are next to each other, the bin numbered 2649 is selected and there’s a 0 for the 2648. By removing the zeros I remove the consecutive CGI bins.

Code
plus_one_targeted <- plus_one_targeted[plus_one_targeted != 0]

Get the -1 bins: here the logic is the same as above, but inverted. If the difference in index number between the targeted bin and the -1 bin is 1, meaning the 2 PcG targeted CGI are one next to each other, exclude the -1 bins.

Code
minus_one_targeted <- c()
for (i in 2:(length(indx_targeted)) ) {
  
  if ( ( indx_targeted[i] - indx_targeted[i - 1] ) == 1 ) {
    # Do no nothing
    minus_one_targeted[i] <- 0
    
  } else {
    minus_one_targeted[i] <- indx_targeted[i] - 1
  }
}

Remove first NA, and consecutive bins defined by zeros.

Code
minus_one_targeted[which(  is.na(minus_one_targeted) )] <- indx_targeted[1] - 1
minus_one_targeted <- minus_one_targeted[minus_one_targeted != 0]

Check binning

Code
message('Targeted indexes:\t', paste0(head(indx_targeted, 7), sep = ' ') )
Targeted indexes:   983 994 2042 2295 2647 2648 2884 
Code
message('Flanking -1 indexes:\t', paste0(head(minus_one_targeted, 7), sep = ' ') )
Flanking -1 indexes:    982 993 2041 2294 2646 2883 4248 

Since bins numbered 2647 and 2648 are next to each other, the bin numbered 2646 is selected and there’s a 0 for the 2647. By removing the zeros I remove the consecutive CGI bins.

Check that there’s no overlap between the targeted and the flanking regions.

Code
table(plus_one_targeted %in% indx_targeted)

FALSE 
 2890 
Code
table(minus_one_targeted %in% indx_targeted)

FALSE 
 2891 

Check that some flanking bins can be both upstream or downstream.

Code
table(minus_one_targeted %in% plus_one_targeted)

FALSE  TRUE 
 2694   197 

Create a new bin type variable called type2.

Code
all_bins25kb_K27me3$type2  <- NA
all_bins25kb_K27me3[ indx_targeted, ]$type2 <- "PcG targeted CGI"
all_bins25kb_K27me3[ plus_one_targeted, ]$type2 <- "PcG targeted CGI + 1"
all_bins25kb_K27me3[ minus_one_targeted, ]$type2 <- "PcG targeted CGI - 1"
all_bins25kb_K27me3[ is.na(all_bins25kb_K27me3$type2), ]$type2 <- "Rest of the genome"  

all_bins25kb_K27me3$type2 <- factor(all_bins25kb_K27me3$type2,
                                   levels = c(
                                     "PcG targeted CGI", 
                                     "PcG targeted CGI + 1",
                                     "PcG targeted CGI - 1",
                                     "Rest of the genome") )
table(all_bins25kb_K27me3$type2)

    PcG targeted CGI PcG targeted CGI + 1 PcG targeted CGI - 1 
                4189                 2693                 2891 
  Rest of the genome 
              984638 

Log10 transform the bins signal.

Code
all_bins25kb_K27me3 |>
  column_to_rownames(var = "bin") |>
  select(chr, start, end, WT, CSex4, dex4, KO, type, type2) |>
  mutate( across( where(is.double), log10 ) ) |>
  # arrange is used to favour the CGI point to appear later in the plot and therefore be above the other points.
  arrange( desc(type2) ) -> log_bins25kb_K27me3

Check total number of bin.

Code
num_all_bins_2.5kb <- nrow(log_bins25kb_K27me3)
message("Number of bins of 2.5kb width to plot: ",  num_all_bins_2.5kb)
Number of bins of 2.5kb width to plot: 994411

Individual bin types numbers:

Code
targeted_nBins <- table(log_bins25kb_K27me3$type2)[names(table(log_bins25kb_K27me3$type2))=="PcG targeted CGI"]
plus_nBins <- table(log_bins25kb_K27me3$type2)[names(table(log_bins25kb_K27me3$type2))=="PcG targeted CGI + 1"]
minus_nBins <- table(log_bins25kb_K27me3$type2)[names(table(log_bins25kb_K27me3$type2))=="PcG targeted CGI - 1"]
flanking_nBins <- sum(plus_nBins, minus_nBins)
ROTG_nBins <- table(log_bins25kb_K27me3$type2)[names(table(log_bins25kb_K27me3$type2))=="Rest of the genome"]

message("Bins containing a targeted CGI: ", targeted_nBins, "\n",
        "Bins flanking a targeted CGI: ", flanking_nBins, "\n",
        "Bins in the rest of genome: ", ROTG_nBins)
Bins containing a targeted CGI: 4189
Bins flanking a targeted CGI: 5584
Bins in the rest of genome: 984638

3.1.1 Scatter plot ∆ex4 vs WT

Fit a linear model between different conditions.

Code
intercet_wt_dex4 <- coef( lm(log_bins25kb_K27me3$dex4 ~ log_bins25kb_K27me3$WT ) )[1] 
slope_wt_dex4 <- coef( lm(log_bins25kb_K27me3$dex4 ~ log_bins25kb_K27me3$WT ) )[2] 

If I were to plot all bins signal as a scatter plot of WT vs ∆ex4 I would do something like this:

Code
log_bins25kb_K27me3 |>
  ggplot() +
  aes(x = WT, y = dex4, colour = type2) +
  # facet_wrap(~type2, scales = 'fixed') +
  geom_point(size = 0.75) +
  geom_abline(slope = 1, intercept = 0, linewidth = 0.2) +
  geom_abline(slope = slope_wt_dex4, intercept = intercet_wt_dex4, linetype = 'dashed', linewidth = 0.2) +
  coord_fixed(ratio = 1, xlim = c(0, 3.5), ylim = c(0, 3.5) ) +
  scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
  scale_y_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
  scale_colour_manual(values = c("PcG targeted CGI - 1" = '#203059', 
                                 "PcG targeted CGI + 1" = '#203059',
                                 "PcG targeted CGI" = '#A2234C',
                                 "Rest of the genome" = '#72817F') ) +
  labs(x = expression("Spike-in normalized WT H3K27me3 signal (" ~ log[10] ~ ")" ), 
       y = expression("Spike-in normalized ∆ex4 H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
  scatter_th -> pBins_WT_vs_dex4_All_Targeted

# do not plot cause it's 2 many points
# pBins_WT_vs_dex4_All_Targeted
ggsave(path = pdf_dir_fig5,
       filename = paste0("Fig5B_CGI_coloured_WT_vs_Dex4_bins_n", num_all_bins_2.5kb, ".pdf"),
       plot = pBins_WT_vs_dex4_All_Targeted, device = cairo_pdf, units = "cm",
       width = 4.5, height = 4.5)

However that breaks the RStudio graphic engine.

To overcome some of the overplotting I remove points that are identical by type2 value. If 2 point have the exact same values, the duplicated point is removed from the dataframe. This helps removing points that are going to be plotted on top of each other, making the plot more manageable. This operation requires the use of the dataframe row names.

Start by removing the many “rest of the genome” bins

Code
indx_dups_ROTG_wt_dex4 <-
  which(duplicated(log_bins25kb_K27me3[log_bins25kb_K27me3$type2 == "Rest of the genome"  , c("WT", "dex4")]))
row_names_dups_ROTG <- rownames(log_bins25kb_K27me3[indx_dups_ROTG_wt_dex4, ]) 
wt_dex4_bins <- log_bins25kb_K27me3[!rownames(log_bins25kb_K27me3) %in% row_names_dups_ROTG,]

Then removing the “PcG targeted CGI”

Code
indx_dups_PTC_wt_dex4 <- which(duplicated(wt_dex4_bins[wt_dex4_bins$type2 == "PcG targeted CGI", c("WT", "dex4")] ) )
row_names_dups_PTC <- rownames(wt_dex4_bins[indx_dups_PTC_wt_dex4, ]) 
wt_dex4_bins <- wt_dex4_bins[!rownames(wt_dex4_bins) %in% row_names_dups_PTC,]

Lastly the +1 and -1 bins

Code
indx_dups_P1M1_wt_dex4 <- which(duplicated(wt_dex4_bins[wt_dex4_bins$type2 %in% c("PcG targeted CGI + 1", "PcG targeted CGI + 1"), c("WT", "dex4")] ) )
row_names_dups_P1M1 <- rownames(wt_dex4_bins[indx_dups_P1M1_wt_dex4, ]) 
wt_dex4_bins <- wt_dex4_bins[!rownames(wt_dex4_bins) %in% row_names_dups_P1M1,]

Re-calculate bin numbers now after filtering

Code
num_fltrd_bins_2.5kb <- nrow(wt_dex4_bins)
message("Number of bins of 2.5kb width to plot: ",  num_fltrd_bins_2.5kb)
Number of bins of 2.5kb width to plot: 16662

Plot figure 5C.

Code
ggplot(wt_dex4_bins) +
  aes(x = WT, y = dex4, colour = type2) +
  # facet_wrap(~type2, scales = 'fixed') +
  geom_point(size = 0.75, alpha = 0.5, stroke = 0) +
  geom_abline(slope = 1, intercept = 0, linewidth = 0.2) +
  geom_abline(slope = slope_wt_dex4, intercept = intercet_wt_dex4, linetype = 'dashed', linewidth = 0.2) +
  coord_fixed(ratio = 1, xlim = c(0, 3.5), ylim = c(0, 3.5), clip = 'on' ) +
  scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
  scale_y_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
  scale_colour_manual(values = c("PcG targeted CGI" = '#A2234C',
                                 "PcG targeted CGI - 1" = '#203059', 
                                 "PcG targeted CGI + 1" = '#203059',
                                 "Rest of the genome" = '#72817F'), 
                      breaks = c("PcG targeted CGI", 
                                 "PcG targeted CGI + 1",
                                 "Rest of the genome"),
                      labels = c("PcG targeted CGI" = "Targeted", 
                                  "PcG targeted CGI + 1" = "Flanking",
                                 "Rest of the genome" = "Rest of genome") ) +
  labs(x = expression("Spike-in normalized WT H3K27me3 signal (" ~ log[10] ~ ")" ), 
       y = expression("Spike-in normalized ∆ex4 H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
  guides(colour = guide_legend(override.aes = list(alpha = 0.99))) +
  scatter_th -> pBins_WT_vs_dex4_Filtered_Targeted

Save to pdf.

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("Fig5C_CGI_coloured_WT_vs_Dex4_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
       plot = pBins_WT_vs_dex4_Filtered_Targeted, device = cairo_pdf, units = "cm",
       width = 4.2, height = 4.2)
Note

The scatter plot contains too many points to be efficiently plotted. For an effective scatter plot visualisation I rasterise only the specific points layer of the ggplot2 plot and keep all other labels and text in vector format.

Code
rBins_WT_vs_dex4_Filtered_Targeted <- rasterize(pBins_WT_vs_dex4_Filtered_Targeted, layers = 'Point', dpi = 400)

Plot rasterised scatter plot for figure 5C

Code
rBins_WT_vs_dex4_Filtered_Targeted

Save to pdf.

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("Fig5C_RASTERIZED_CGI_coloured_WT_vs_Dex4_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
       plot = rBins_WT_vs_dex4_Filtered_Targeted, device = cairo_pdf, units = "cm",
       width = 4.2, height = 4.2)

Now do the same but for CSex4 vs WT.

3.1.2 Scatter plot CSex4 vs WT

Get linear fit.

Code
intercet_wt_CSex4 <- coef( lm(log_bins25kb_K27me3$CSex4 ~ log_bins25kb_K27me3$WT ) )[1] 
slope_wt_CSex4 <- coef( lm(log_bins25kb_K27me3$CSex4 ~ log_bins25kb_K27me3$WT ) )[2] 

Filter the bins like before. Start by removing the many “rest of the genome” bins

Code
indx_dups_ROTG_wt_CSex4 <- which(duplicated(log_bins25kb_K27me3[log_bins25kb_K27me3$type2 == "Rest of the genome", c("WT", "CSex4")]))
row_names_dups_ROTG <- rownames(log_bins25kb_K27me3[indx_dups_ROTG_wt_CSex4, ]) 
wt_CSex4_bins <- log_bins25kb_K27me3[!rownames(log_bins25kb_K27me3) %in% row_names_dups_ROTG,]

Filter the “PcG targeted CGI” in CSex4 vs WT dataset

Code
indx_dups_PTC_wt_CSex4 <- which(duplicated(wt_CSex4_bins[wt_CSex4_bins$type2 == "PcG targeted CGI", c("WT", "CSex4")] ) )
row_names_dups_PTC <- rownames(wt_CSex4_bins[indx_dups_PTC_wt_CSex4, ]) 
wt_CSex4_bins <- wt_CSex4_bins[!rownames(wt_CSex4_bins) %in% row_names_dups_PTC,]

And the +1 and -1 flanking bins

Code
indx_dups_P1M1_wt_CSex4 <- which(duplicated(wt_CSex4_bins[wt_CSex4_bins$type2 %in% c("PcG targeted CGI + 1", "PcG targeted CGI + 1"), c("WT", "CSex4")] ) )
row_names_dups_P1M1 <- rownames(wt_CSex4_bins[indx_dups_P1M1_wt_CSex4, ]) 
wt_CSex4_bins <- wt_CSex4_bins[!rownames(wt_CSex4_bins) %in% row_names_dups_P1M1,]

Check number of bins after filtering

Code
num_fltrd_bins_2.5kb <- nrow(wt_CSex4_bins)
message("Number of bins of 2.5kb width to plot: ", num_fltrd_bins_2.5kb)
Number of bins of 2.5kb width to plot: 16189

Basically this filtering removes duplicated points.

Code
ggplot(wt_CSex4_bins) +
  aes(x = WT, y = CSex4, colour = type2) +
  geom_point(size = 0.75, alpha = 0.5, stroke = 0) +
  geom_abline(slope = 1, intercept = 0, linewidth = 0.2) +
  geom_abline(slope = slope_wt_CSex4, intercept = intercet_wt_CSex4, 
              linetype = 'dashed', linewidth = 0.2) +
  coord_fixed(ratio = 1, xlim = c(0, 3.5), ylim = c(0, 3.5), clip = 'on' ) +
  scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
  scale_y_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
  scale_colour_manual(values = c("PcG targeted CGI" = '#A2234C',
                                 "PcG targeted CGI - 1" = '#203059', 
                                 "PcG targeted CGI + 1" = '#203059',
                                 "Rest of the genome" = '#72817F'), 
                      breaks = c("PcG targeted CGI", 
                                 "PcG targeted CGI + 1",
                                 "Rest of the genome"),
                      labels = c("PcG targeted CGI" = "Targeted", 
                                 "PcG targeted CGI + 1" = "Flanking",
                                 "Rest of the genome" = "Rest of genome") ) +
  labs(x = expression("Spike-in normalized WT H3K27me3 signal (" ~ log[10] ~ ")" ), 
       y = expression("Spike-in normalized CSex4 H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
  guides(colour = guide_legend(override.aes = list(alpha = 0.99))) +
  scatter_th -> pBins_WT_vs_CSex4_Filtered_Targeted

Save to pdf vector.

Code
ggsave(path = pdf_dir_fig5,  
       filename = paste0("Fig5B_CGI_coloured_WT_vs_CSex4_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
       plot = pBins_WT_vs_CSex4_Filtered_Targeted, device = cairo_pdf, units = "cm",
       width = 4.2, height = 4.2)

Rasterise only the point layer of ggplot

Code
rBins_WT_vs_CSex4_Filtered_Targeted <- rasterize(pBins_WT_vs_CSex4_Filtered_Targeted, layers = 'Point', dpi = 400)

Show plot as presented in figure 5B.

Code
rBins_WT_vs_CSex4_Filtered_Targeted

Save to pdf rasterised scatter plot.

Code
ggsave(path = pdf_dir_fig5,  
       filename = paste0("Fig5B_RASTERIZED_CGI_coloured_WT_vs_CSex4_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
       plot = rBins_WT_vs_CSex4_Filtered_Targeted, device = cairo_pdf, units = "cm",
       width = 4.2, height = 4.2)

3.2 Spike-in normalised H3K27me3 signal in 2.5kb genomic bins in KO rescues cell lines

Here I repeat the same analysis but for the SUZ12-KO isoform specific rescue cell lines.

Code
all_bins25kb_K27me3_rescues <- read_delim(
  file = file.path(round23_scatter_dir, 'H3K27me3_R3all--W2-S1-S3-L4-L6-L3D2-S3D2_bins_2.5Kb_yes.txt'),
                                  delim = ' ', progress = F,
                                  col_names = c('bin', 'WT', 'CSex4', 'dex4', 'KO',
                                                'WT2', 'KOrS1', 'KOrS3', 'KOrL4', 'KOrL6', 'KOrL3D2', 'KOrS3D2'),
                                  show_col_types = FALSE) |>
  mutate(chr = str_split_fixed(pattern = "\\*", string = bin, n = 3)[,1],
         start = str_split_fixed(pattern = "\\*", string = bin, n = 3)[,2],
         end = str_split_fixed(pattern = "\\*", string = bin, n = 3)[,3]) |>
  relocate(chr, start, end, .after = bin) 

The samples 'WT2', 'KOrS1', 'KOrS3', 'KOrL4', 'KOrL6', 'KOrL3D2', 'KOrS3D2' were done after in a second round compared to the first round of samples ('WT', 'CSex4', 'dex4', 'KO').

Check how many bins fall within a SUZ12 CGIs or other CGI

Code
table(all_bins25kb_K27me3_rescues$bin %in% bins25kbK27me3_CGI_SUZ12$bin)

 FALSE   TRUE 
987212   4189 
Code
table(all_bins25kb_K27me3_rescues$bin %in% bins25kbK27me3_other_CGI$bin)

 FALSE   TRUE 
978601  12800 

Annotate rescues bins that fall in targeted CGIs.

Code
all_bins25kb_K27me3_rescues |>
  mutate(type = case_when(bin %in% bins25kbK27me3_CGI_SUZ12$bin ~ "PcG targeted CGI",
                          bin %in% bins25kbK27me3_other_CGI$bin ~ "Other CGI") ) |>
  mutate(type = ifelse(is.na(type), yes = "Rest of the genome", no = type )
  ) -> all_bins25kb_K27me3_rescues
Code
all_bins25kb_K27me3_rescues$type <- factor(all_bins25kb_K27me3_rescues$type,
                                   levels = c("PcG targeted CGI", "Other CGI", "Rest of the genome") )

Same as before identify the CGI flanking bins.

First the downstream +1 bins.

Code
indx_targeted <- which(all_bins25kb_K27me3_rescues$type == "PcG targeted CGI")

plus_one_targeted <- c()
for (i in 1:(length(indx_targeted)-1) ) {
  
  if ( (indx_targeted[i + 1] - indx_targeted[i]) == 1 ) {
    # Do no nothing
    plus_one_targeted[i] <- 0
    
  } else {
    plus_one_targeted[i] <- indx_targeted[i] + 1
  }
}

plus_one_targeted <- plus_one_targeted[plus_one_targeted != 0]

Then the upstream -1 bins

Code
minus_one_targeted <- c()
for (i in 2:(length(indx_targeted)) ) {
  
  if ( ( indx_targeted[i] - indx_targeted[i - 1] ) == 1 ) {
    # Do no nothing
    minus_one_targeted[i] <- 0
    
  } else {
    minus_one_targeted[i] <- indx_targeted[i] - 1
  }
}

minus_one_targeted[which(  is.na(minus_one_targeted) )] <- indx_targeted[1] - 1
minus_one_targeted <- minus_one_targeted[minus_one_targeted != 0]

Create a new bin type variable called type2 also for rescue data.

Code
all_bins25kb_K27me3_rescues$type2  <- NA
all_bins25kb_K27me3_rescues[ indx_targeted, ]$type2 <- "PcG targeted CGI"
all_bins25kb_K27me3_rescues[ plus_one_targeted, ]$type2 <- "PcG targeted CGI + 1"
all_bins25kb_K27me3_rescues[ minus_one_targeted, ]$type2 <- "PcG targeted CGI - 1"
all_bins25kb_K27me3_rescues[ is.na(all_bins25kb_K27me3_rescues$type2), ]$type2 <- "Rest of the genome"  

all_bins25kb_K27me3_rescues$type2 <- factor(all_bins25kb_K27me3_rescues$type2,
                                    levels = c(
                                      "PcG targeted CGI", 
                                      "PcG targeted CGI + 1",
                                      "PcG targeted CGI - 1",
                                      "Rest of the genome") )
table(all_bins25kb_K27me3_rescues$type2)

    PcG targeted CGI PcG targeted CGI + 1 PcG targeted CGI - 1 
                4189                 2693                 2890 
  Rest of the genome 
              981629 

Log10 transform the bins signal in rescue data.

Code
all_bins25kb_K27me3_rescues |>
  column_to_rownames(var = "bin") |>
  select(chr, start, end, WT2, KOrS1, KOrS3, KOrL4, KOrL6, KOrL3D2, KOrS3D2, type, type2) |>
  mutate( across( where(is.double), log10 ) ) |>
  # arrange is used to favour the CGI point to appear later in the plot and therefore be above the other points.
  arrange( desc(type2) ) -> log_bins25kb_K27me3_rescue

Check total number of bin in rescues.

Code
num_all_bins_2.5kb <- nrow(log_bins25kb_K27me3_rescue)
message("Number of bins of 2.5kb width to plot: ",  num_all_bins_2.5kb)
Number of bins of 2.5kb width to plot: 991401

Individual bin types numbers in rescues:

Code
targeted_nBins <- table(log_bins25kb_K27me3_rescue$type2)[names(table(log_bins25kb_K27me3_rescue$type2))=="PcG targeted CGI"]
plus_nBins <- table(log_bins25kb_K27me3_rescue$type2)[names(table(log_bins25kb_K27me3_rescue$type2))=="PcG targeted CGI + 1"]
minus_nBins <- table(log_bins25kb_K27me3_rescue$type2)[names(table(log_bins25kb_K27me3_rescue$type2))=="PcG targeted CGI - 1"]
flanking_nBins <- sum(plus_nBins, minus_nBins)
ROTG_nBins <- table(log_bins25kb_K27me3_rescue$type2)[names(table(log_bins25kb_K27me3_rescue$type2))=="Rest of the genome"]

message("Bins containing a targeted CGI: ", targeted_nBins, "\n",
        "Bins flanking a targeted CGI: ", flanking_nBins, "\n",
        "Bins in the rest of genome: ", ROTG_nBins)
Bins containing a targeted CGI: 4189
Bins flanking a targeted CGI: 5583
Bins in the rest of genome: 981629

3.2.1 Scatter plot KO+SUZ12-S vs WT

Fit a linear model between different conditions.

Code
intercet_wt_KOrS1 <- coef( lm(log_bins25kb_K27me3_rescue$KOrS1 ~ log_bins25kb_K27me3_rescue$WT2 ) )[1] 
slope_wt_KOrS1 <- coef( lm(log_bins25kb_K27me3_rescue$KOrS1 ~ log_bins25kb_K27me3_rescue$WT2 ) )[2] 

intercet_wt_KOrS3 <- coef( lm(log_bins25kb_K27me3_rescue$KOrS3 ~ log_bins25kb_K27me3_rescue$WT2 ) )[1] 
slope_wt_KOrS3 <- coef( lm(log_bins25kb_K27me3_rescue$KOrS3 ~ log_bins25kb_K27me3_rescue$WT2 ) )[2] 

To overcome some of the overplotting I remove points that are identical by type2 value. If 2 point have the exact same values, the duplicated point is removed from the dataframe.

Start by removing the many “rest of the genome” bins

Code
indx_dups_ROTG_wt_KOrS1 <-
  which(duplicated(log_bins25kb_K27me3_rescue[log_bins25kb_K27me3_rescue$type2 == "Rest of the genome"  , c("WT2", "KOrS1")]))
row_names_dups_ROTG <- rownames(log_bins25kb_K27me3_rescue[indx_dups_ROTG_wt_KOrS1, ]) 
wt_KOrS1_bins <- log_bins25kb_K27me3_rescue[!rownames(log_bins25kb_K27me3_rescue) %in% row_names_dups_ROTG,]

Then removing the “PcG targeted CGI”

Code
indx_dups_PTC_wt_KOrS1 <- which(duplicated(wt_KOrS1_bins[wt_KOrS1_bins$type2 == "PcG targeted CGI", c("WT2", "KOrS1")] ) )
row_names_dups_PTC <- rownames(wt_KOrS1_bins[indx_dups_PTC_wt_KOrS1, ]) 
wt_KOrS1_bins <- wt_KOrS1_bins[!rownames(wt_KOrS1_bins) %in% row_names_dups_PTC,]

Lastly the +1 and -1 bins

Code
indx_dups_P1M1_wt_KOrS1 <- which(duplicated(wt_KOrS1_bins[wt_KOrS1_bins$type2 %in% c("PcG targeted CGI + 1", "PcG targeted CGI + 1"), c("WT2", "KOrS1")] ) )
row_names_dups_P1M1 <- rownames(wt_KOrS1_bins[indx_dups_P1M1_wt_KOrS1, ]) 
wt_KOrS1_bins <- wt_KOrS1_bins[!rownames(wt_KOrS1_bins) %in% row_names_dups_P1M1,]

Re-calculate bin numbers now after filtering

Code
num_fltrd_bins_2.5kb <- nrow(wt_KOrS1_bins)
message("Number of bins of 2.5kb width to plot: ",  num_fltrd_bins_2.5kb)
Number of bins of 2.5kb width to plot: 16948

Plot figure 5E

Code
ggplot(wt_KOrS1_bins) +
  aes(x = WT2, y = KOrS1, colour = type2) +
  # facet_wrap(~type2, scales = 'fixed') +
  geom_point(size = 0.75, alpha = 0.5, stroke = 0) +
  geom_abline(slope = 1, intercept = 0, linewidth = 0.2) +
  geom_abline(slope = slope_wt_KOrS1, intercept = intercet_wt_KOrS1, linetype = 'dashed', linewidth = 0.2) +
  coord_fixed(ratio = 1, xlim = c(0, 3.5), ylim = c(0, 3.5), clip = 'on' ) +
  scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
  scale_y_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
  scale_colour_manual(values = c("PcG targeted CGI" = '#A2234C',
                                 "PcG targeted CGI - 1" = '#203059', 
                                 "PcG targeted CGI + 1" = '#203059',
                                 "Rest of the genome" = '#72817F'), 
                      breaks = c("PcG targeted CGI", 
                                 "PcG targeted CGI + 1",
                                 "Rest of the genome"),
                      labels = c("PcG targeted CGI" = "Targeted", 
                                  "PcG targeted CGI + 1" = "Flanking",
                                 "Rest of the genome" = "Rest of genome") ) +
  labs(x = expression("Spike-in normalized WT H3K27me3 signal (" ~ log[10] ~ ")" ), 
       y = expression("Spike-in normalized KO+S H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
  guides(colour = guide_legend(override.aes = list(alpha = 0.99))) +
  scatter_th -> pBins_WT_vs_KOrS1_Filtered_Targeted
pBins_WT_vs_KOrS1_Filtered_Targeted

Save to pdf.

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("Fig5E_CGI_coloured_WT_vs_KOrS1_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
       plot = pBins_WT_vs_KOrS1_Filtered_Targeted, device = cairo_pdf, units = "cm",
       width = 4.2, height = 4.2)

Rasterise the points to 400dpi

Code
rBins_WT_vs_KOrS1_Filtered_Targeted <- rasterize(pBins_WT_vs_KOrS1_Filtered_Targeted, layers = 'Point', dpi = 400)

Plot rasterised scatter plot for figure 3B

Code
rBins_WT_vs_KOrS1_Filtered_Targeted

Save to pdf.

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("Fig5E_RASTERIZED_CGI_coloured_WT_vs_KOrS1_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
       plot = rBins_WT_vs_KOrS1_Filtered_Targeted, device = cairo_pdf, units = "cm",
       width = 4.2, height = 4.2)

Now for the KO+L (KOrL4).

3.2.2 Scatter plot KO+SUZ12-L vs WT

Get linear fit.

Code
intercet_wt_KOrL4 <- coef( lm(log_bins25kb_K27me3_rescue$KOrL4 ~ log_bins25kb_K27me3_rescue$WT2 ) )[1] 
slope_wt_KOrL4 <- coef( lm(log_bins25kb_K27me3_rescue$KOrL4 ~ log_bins25kb_K27me3_rescue$WT2 ) )[2] 

intercet_wt_KOrL6 <- coef( lm(log_bins25kb_K27me3_rescue$KOrL6 ~ log_bins25kb_K27me3_rescue$WT2 ) )[1] 
slope_wt_KOrL6 <- coef( lm(log_bins25kb_K27me3_rescue$KOrL6 ~ log_bins25kb_K27me3_rescue$WT2 ) )[2] 

Check linear fit parameters

Code
message("WT vs KO+L4: intercept = ", round(intercet_wt_KOrL4, 3), " slope = ", round(slope_wt_KOrL4, 3))
WT vs KO+L4: intercept = 0 slope = 0.824
Code
message("WT vs KO+L6: intercept = ", round(intercet_wt_KOrL6, 3), " slope = ", round(slope_wt_KOrL6, 3))
WT vs KO+L6: intercept = 0.222 slope = 0.772

Filter the bins like before. Start by removing the many “rest of the genome” bins

Code
indx_dups_ROTG_wt_KOrL4 <- which(duplicated(log_bins25kb_K27me3_rescue[log_bins25kb_K27me3_rescue$type2 == "Rest of the genome", c("WT2", "KOrL4")]))
row_names_dups_ROTG <- rownames(log_bins25kb_K27me3_rescue[indx_dups_ROTG_wt_KOrL4, ]) 
wt_KOrL4_bins <- log_bins25kb_K27me3_rescue[!rownames(log_bins25kb_K27me3_rescue) %in% row_names_dups_ROTG,]

Filter the “PcG targeted CGI” in KO rescue Long clone 4 vs WT dataset

Code
indx_dups_PTC_wt_KOrL4 <- which(duplicated(wt_KOrL4_bins[wt_KOrL4_bins$type2 == "PcG targeted CGI", c("WT2", "KOrL4")] ) )
row_names_dups_PTC <- rownames(wt_KOrL4_bins[indx_dups_PTC_wt_KOrL4, ]) 
wt_KOrL4_bins <- wt_KOrL4_bins[!rownames(wt_KOrL4_bins) %in% row_names_dups_PTC,]

And the +1 and -1 flanking bins

Code
indx_dups_P1M1_wt_KOrL4 <- which(duplicated(wt_KOrL4_bins[wt_KOrL4_bins$type2 %in% c("PcG targeted CGI + 1", "PcG targeted CGI + 1"), c("WT2", "KOrL4")] ) )
row_names_dups_P1M1 <- rownames(wt_KOrL4_bins[indx_dups_P1M1_wt_KOrL4, ]) 
wt_KOrL4_bins <- wt_KOrL4_bins[!rownames(wt_KOrL4_bins) %in% row_names_dups_P1M1,]

Check number of bins after filtering:

Code
num_fltrd_bins_2.5kb <- nrow(wt_KOrL4_bins)
message("Number of bins of 2.5kb width to plot: ", num_fltrd_bins_2.5kb)
Number of bins of 2.5kb width to plot: 14071

Plot figure 5D

Code
ggplot(wt_KOrL4_bins) +
  aes(x = WT2, y = KOrL4, colour = type2) +
  geom_point(size = 0.75, alpha = 0.5, stroke = 0) +
  geom_abline(slope = 1, intercept = 0, linewidth = 0.2) +
  geom_abline(slope = slope_wt_KOrL4, intercept = intercet_wt_KOrL4, 
              linetype = 'dashed', linewidth = 0.2) +
  coord_fixed(ratio = 1, xlim = c(0, 3.5), ylim = c(0, 3.5), clip = 'on' ) +
  scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
  scale_y_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
  scale_colour_manual(values = c("PcG targeted CGI" = '#A2234C',
                                 "PcG targeted CGI - 1" = '#203059', 
                                 "PcG targeted CGI + 1" = '#203059',
                                 "Rest of the genome" = '#72817F'), 
                      breaks = c("PcG targeted CGI", 
                                 "PcG targeted CGI + 1",
                                 "Rest of the genome"),
                      labels = c("PcG targeted CGI" = "Targeted", 
                                 "PcG targeted CGI + 1" = "Flanking",
                                 "Rest of the genome" = "Rest of genome") ) +
  labs(x = expression("Spike-in normalized WT H3K27me3 signal (" ~ log[10] ~ ")" ), 
       y = expression("Spike-in normalized KO+L H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
  guides(colour = guide_legend(override.aes = list(alpha = 0.99))) +
  scatter_th -> pBins_WT_vs_KOrL4_Filtered_Targeted
pBins_WT_vs_KOrL4_Filtered_Targeted

Save to pdf vector.

Code
ggsave(path = pdf_dir_fig5,  
       filename = paste0("Fig5D_CGI_coloured_WT_vs_KOrL4_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
       plot = pBins_WT_vs_KOrL4_Filtered_Targeted, device = cairo_pdf, units = "cm",
       width = 4.2, height = 4.2)

Rasterise only the point layer of ggplot

Code
rBins_WT_vs_KOrL4_Filtered_Targeted <- rasterize(pBins_WT_vs_KOrL4_Filtered_Targeted, layers = 'Point', dpi = 400)

Show plot as presented in figure.

Code
rBins_WT_vs_KOrL4_Filtered_Targeted

Save to pdf rasterised scatter plot.

Code
ggsave(path = pdf_dir_fig5,  
       filename = paste0("Fig5D_RASTERIZED_CGI_coloured_WT_vs_KOrL4_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
       plot = rBins_WT_vs_KOrL4_Filtered_Targeted, device = cairo_pdf, units = "cm",
       width = 4.2, height = 4.2)

3.2.3 Scatter plot KO+SUZ12-S3D vs WT

SUZ12-S 3D mutant H3K27me3 scatter plot.

Get linear fit.

Code
intercet_wt_KOrS3D2 <- coef( lm(log_bins25kb_K27me3_rescue$KOrS3D2 ~ log_bins25kb_K27me3_rescue$WT2 ) )[1] 
slope_wt_KOrS3D2 <- coef( lm(log_bins25kb_K27me3_rescue$KOrS3D2 ~ log_bins25kb_K27me3_rescue$WT2 ) )[2] 

Check linear fit parameters

Code
message("WT2 vs KOrS3D2: intercept = ", round(intercet_wt_KOrS3D2, 3), 
        " slope = ", round(slope_wt_KOrS3D2, 3))
WT2 vs KOrS3D2: intercept = 0.173 slope = 0.809

Filter the bins like before. Start by removing the many “rest of the genome” bins

Code
indx_dups_ROTG_wt_KOrS3D2 <- which(duplicated(log_bins25kb_K27me3_rescue[log_bins25kb_K27me3_rescue$type2 == "Rest of the genome", c("WT2", "KOrS3D2")]))
row_names_dups_ROTG <- rownames(log_bins25kb_K27me3_rescue[indx_dups_ROTG_wt_KOrS3D2, ]) 
wt_KOrS3D2_bins <- log_bins25kb_K27me3_rescue[!rownames(log_bins25kb_K27me3_rescue) %in% row_names_dups_ROTG,]

Filter the “PcG targeted CGI” in KO rescue SUZ12-Short 3D mutant vs WT dataset

Code
indx_dups_PTC_wt_KOrS3D2 <- which(duplicated(wt_KOrS3D2_bins[wt_KOrS3D2_bins$type2 == "PcG targeted CGI", c("WT2", "KOrS3D2")] ) )
row_names_dups_PTC <- rownames(wt_KOrS3D2_bins[indx_dups_PTC_wt_KOrS3D2, ]) 
wt_KOrS3D2_bins <- wt_KOrS3D2_bins[!rownames(wt_KOrS3D2_bins) %in% row_names_dups_PTC,]

And the +1 and -1 flanking bins

Code
indx_dups_P1M1_wt_KOrS3D2 <- which(duplicated(wt_KOrS3D2_bins[wt_KOrS3D2_bins$type2 %in% c("PcG targeted CGI + 1", "PcG targeted CGI + 1"), c("WT2", "KOrS3D2")] ) )
row_names_dups_P1M1 <- rownames(wt_KOrS3D2_bins[indx_dups_P1M1_wt_KOrS3D2, ]) 
wt_KOrS3D2_bins <- wt_KOrS3D2_bins[!rownames(wt_KOrS3D2_bins) %in% row_names_dups_P1M1,]

Check number of bins after filtering:

Code
num_fltrd_bins_2.5kb <- nrow(wt_KOrS3D2_bins)
message("Number of bins of 2.5kb width to plot: ", num_fltrd_bins_2.5kb)
Number of bins of 2.5kb width to plot: 12240

Plot figure S5G

Code
ggplot(wt_KOrS3D2_bins) +
  aes(x = WT2, y = KOrS3D2, colour = type2) +
  geom_point(size = 0.75, alpha = 0.5, stroke = 0) +
  geom_abline(slope = 1, intercept = 0, linewidth = 0.2) +
  geom_abline(slope = slope_wt_KOrS3D2, intercept = intercet_wt_KOrS3D2, 
              linetype = 'dashed', linewidth = 0.2) +
  coord_fixed(ratio = 1, xlim = c(0, 3.5), ylim = c(0, 3.5), clip = 'on' ) +
  scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
  scale_y_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
  scale_colour_manual(values = c("PcG targeted CGI" = '#A2234C',
                                 "PcG targeted CGI - 1" = '#203059', 
                                 "PcG targeted CGI + 1" = '#203059',
                                 "Rest of the genome" = '#72817F'), 
                      breaks = c("PcG targeted CGI", 
                                 "PcG targeted CGI + 1",
                                 "Rest of the genome"),
                      labels = c("PcG targeted CGI" = "Targeted", 
                                 "PcG targeted CGI + 1" = "Flanking",
                                 "Rest of the genome" = "Rest of genome") ) +
  labs(x = expression("Spike-in normalized WT2 H3K27me3 signal (" ~ log[10] ~ ")" ), 
       y = expression("Spike-in normalized KO+S-3D H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
  guides(colour = guide_legend(override.aes = list(alpha = 0.99))) +
  scatter_th -> pBins_WT_vs_KOrS3D2_Filtered_Targeted
pBins_WT_vs_KOrS3D2_Filtered_Targeted

Save to pdf vector.

Code
ggsave(path = pdf_dir_fig5,  
       filename = paste0("FigS5G_CGI_coloured_WT_vs_KOrS3D2_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
       plot = pBins_WT_vs_KOrS3D2_Filtered_Targeted, device = cairo_pdf, units = "cm",
       width = 4.2, height = 4.2)

Rasterise only the point layer of ggplot

Code
rBins_WT_vs_KOrS3D2_Filtered_Targeted <- rasterize(pBins_WT_vs_KOrS3D2_Filtered_Targeted,
                                                   layers = 'Point', dpi = 400)

Show plot as presented in figure.

Code
rBins_WT_vs_KOrS3D2_Filtered_Targeted

Save to pdf rasterised scatter plot.

Code
ggsave(path = pdf_dir_fig5,  
       filename = paste0("FigS5G_RASTERIZED_CGI_coloured_WT_vs_KOrS3D2_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
       plot = rBins_WT_vs_KOrS3D2_Filtered_Targeted, device = cairo_pdf, units = "cm",
       width = 4.2, height = 4.2)

Show the same plot but without the log10 scaling of the data.

Code
ggplot(wt_KOrS3D2_bins) +
  aes(x = 10^WT2, y = 10^KOrS3D2, colour = type2) +
  geom_point(size = 0.75, alpha = 0.5, stroke = 0) +
  geom_abline(slope = 1, intercept = 0, linewidth = 0.2) +
  geom_abline(slope = slope_wt_KOrS3D2, intercept = intercet_wt_KOrS3D2, 
              linetype = 'dashed', linewidth = 0.2) +
  coord_fixed(ratio = 1, xlim = c(0, 150), ylim = c(0, 150), clip = 'on' ) +
  scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
  scale_y_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
  scale_colour_manual(values = c("PcG targeted CGI" = '#A2234C',
                                 "PcG targeted CGI - 1" = '#203059', 
                                 "PcG targeted CGI + 1" = '#203059',
                                 "Rest of the genome" = '#72817F'), 
                      breaks = c("PcG targeted CGI", 
                                 "PcG targeted CGI + 1",
                                 "Rest of the genome"),
                      labels = c("PcG targeted CGI" = "Targeted", 
                                 "PcG targeted CGI + 1" = "Flanking",
                                 "Rest of the genome" = "Rest of genome") ) +
  labs(x = expression("Spike-in normalized WT#2 H3K27me3 signal" ), 
       y = expression("Spike-in normalized KO+S-3D H3K27me3 signal" ) ) +
  guides(colour = guide_legend(override.aes = list(alpha = 0.99))) +
  scatter_th +
  theme(legend.position = c(0.3, 0.92) ) -> pBins_WT_vs_KOrS3D2_Filtered_Targeted_NOLOG10
pBins_WT_vs_KOrS3D2_Filtered_Targeted_NOLOG10

Save to pdf vector.

Code
ggsave(path = pdf_dir_fig5,  
       filename = paste0("FigS5G_CGI_NoLog10_WT_vs_KOrS3D2_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
       plot = pBins_WT_vs_KOrS3D2_Filtered_Targeted_NOLOG10, device = cairo_pdf, units = "cm",
       width = 4.2, height = 4.2)

Rasterise only the point layer of ggplot

Code
rBins_WT_vs_KOrS3D2_NoLog10 <- rasterize(pBins_WT_vs_KOrS3D2_Filtered_Targeted_NOLOG10, 
                                                   layers = 'Point', dpi = 400)

Show plot as presented in figure.

Code
rBins_WT_vs_KOrS3D2_NoLog10

Save to pdf rasterised scatter plot.

Code
ggsave(path = pdf_dir_fig5,  
       filename = paste0("FigS5G_RASTERIZED_CGI_NoLog10_WT_vs_KOrS3D2_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
       plot = rBins_WT_vs_KOrS3D2_NoLog10, device = cairo_pdf, units = "cm",
       width = 4.2, height = 4.2)

3.2.4 Scatter plot KO+SUZ12-L3D vs WT

SUZ12-L 3D mutant H3K27me3 scatter plot.

Get linear fit.

Code
intercet_wt_KOrL3D2 <- coef( lm(log_bins25kb_K27me3_rescue$KOrL3D2 ~ log_bins25kb_K27me3_rescue$WT2 ) )[1] 
slope_wt_KOrL3D2 <- coef( lm(log_bins25kb_K27me3_rescue$KOrL3D2 ~ log_bins25kb_K27me3_rescue$WT2 ) )[2] 

Check linear fit parameters

Code
message("WT vs KO+L-3D: intercept = ", round(intercet_wt_KOrL3D2, 3), 
        " slope = ", round(slope_wt_KOrL3D2, 3))
WT vs KO+L-3D: intercept = 0.444 slope = 0.751

Filter the bins like before. Start by removing the many “rest of the genome” bins

Code
indx_dups_ROTG_wt_KOrL3D2 <- which(duplicated(log_bins25kb_K27me3_rescue[log_bins25kb_K27me3_rescue$type2 == "Rest of the genome", c("WT2", "KOrL3D2")]))
row_names_dups_ROTG <- rownames(log_bins25kb_K27me3_rescue[indx_dups_ROTG_wt_KOrL3D2, ]) 
wt_KOrL3D2_bins <- log_bins25kb_K27me3_rescue[!rownames(log_bins25kb_K27me3_rescue) %in% row_names_dups_ROTG,]

Filter the “PcG targeted CGI” in KO rescue SUZ12-Long 3D mutant vs WT dataset

Code
indx_dups_PTC_wt_KOrL3D2 <- which(duplicated(wt_KOrL3D2_bins[wt_KOrL3D2_bins$type2 == "PcG targeted CGI", c("WT2", "KOrL3D2")] ) )
row_names_dups_PTC <- rownames(wt_KOrL3D2_bins[indx_dups_PTC_wt_KOrL3D2, ]) 
wt_KOrL3D2_bins <- wt_KOrL3D2_bins[!rownames(wt_KOrL3D2_bins) %in% row_names_dups_PTC,]

And the +1 and -1 flanking bins

Code
indx_dups_P1M1_wt_KOrL3D2 <- which(duplicated(wt_KOrL3D2_bins[wt_KOrL3D2_bins$type2 %in% c("PcG targeted CGI + 1", "PcG targeted CGI + 1"), c("WT2", "KOrL3D2")] ) )
row_names_dups_P1M1 <- rownames(wt_KOrL3D2_bins[indx_dups_P1M1_wt_KOrS3D2, ]) 
wt_KOrL3D2_bins <- wt_KOrL3D2_bins[!rownames(wt_KOrL3D2_bins) %in% row_names_dups_P1M1,]

Check number of bins after filtering:

Code
num_fltrd_bins_2.5kb <- nrow(wt_KOrL3D2_bins)
message("Number of bins of 2.5kb width to plot: ", num_fltrd_bins_2.5kb)
Number of bins of 2.5kb width to plot: 14481

Plot figure S5F

Code
ggplot(wt_KOrL3D2_bins) +
  aes(x = WT2, y = KOrL3D2, colour = type2) +
  geom_point(size = 0.75, alpha = 0.5, stroke = 0) +
  geom_abline(slope = 1, intercept = 0, linewidth = 0.2) +
  geom_abline(slope = slope_wt_KOrL3D2, intercept = intercet_wt_KOrL3D2, 
              linetype = 'dashed', linewidth = 0.2) +
  coord_fixed(ratio = 1, xlim = c(0, 3.5), ylim = c(0, 3.5), clip = 'on' ) +
  scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
  scale_y_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
  scale_colour_manual(values = c("PcG targeted CGI" = '#A2234C',
                                 "PcG targeted CGI - 1" = '#203059', 
                                 "PcG targeted CGI + 1" = '#203059',
                                 "Rest of the genome" = '#72817F'), 
                      breaks = c("PcG targeted CGI", 
                                 "PcG targeted CGI + 1",
                                 "Rest of the genome"),
                      labels = c("PcG targeted CGI" = "Targeted", 
                                 "PcG targeted CGI + 1" = "Flanking",
                                 "Rest of the genome" = "Rest of genome") ) +
  labs(x = expression("Spike-in normalized WT#2 H3K27me3 signal (" ~ log[10] ~ ")" ), 
       y = expression("Spike-in normalized KO+L-3D H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
  guides(colour = guide_legend(override.aes = list(alpha = 0.99))) +
  scatter_th -> pBins_WT_vs_KOrL3D2_Filtered_Targeted
pBins_WT_vs_KOrL3D2_Filtered_Targeted

Save to pdf vector.

Code
ggsave(path = pdf_dir_fig5,  
       filename = paste0("FigS5F_CGI_coloured_WT_vs_KOrL3D2_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
       plot = pBins_WT_vs_KOrL3D2_Filtered_Targeted, device = cairo_pdf, units = "cm",
       width = 4.2, height = 4.2)

Rasterise only the point layer of ggplot

Code
rBins_WT_vs_KOrL3D2_Filtered_Targeted <- rasterize(pBins_WT_vs_KOrL3D2_Filtered_Targeted,
                                                   layers = 'Point', dpi = 400)

Show plot as presented in figure.

Code
rBins_WT_vs_KOrL3D2_Filtered_Targeted

Save to pdf rasterised scatter plot.

Code
ggsave(path = pdf_dir_fig5,  
       filename = paste0("FigS5F_RASTERIZED_CGI_coloured_WT_vs_KOrL3D2_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
       plot = rBins_WT_vs_KOrL3D2_Filtered_Targeted, device = cairo_pdf, units = "cm",
       width = 4.2, height = 4.2)

Show the same plot but without the log10 scaling of the data.

Code
ggplot(wt_KOrL3D2_bins) +
  aes(x = 10^WT2, y = 10^KOrL3D2, colour = type2) +
  geom_point(size = 0.75, alpha = 0.5, stroke = 0) +
  geom_abline(slope = 1, intercept = 0, linewidth = 0.2) +
  geom_abline(slope = slope_wt_KOrS3D2, intercept = intercet_wt_KOrS3D2, 
              linetype = 'dashed', linewidth = 0.2) +
  coord_fixed(ratio = 1, xlim = c(0, 150), ylim = c(0, 150), clip = 'on' ) +
  scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
  scale_y_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
  scale_colour_manual(values = c("PcG targeted CGI" = '#A2234C',
                                 "PcG targeted CGI - 1" = '#203059', 
                                 "PcG targeted CGI + 1" = '#203059',
                                 "Rest of the genome" = '#72817F'), 
                      breaks = c("PcG targeted CGI", 
                                 "PcG targeted CGI + 1",
                                 "Rest of the genome"),
                      labels = c("PcG targeted CGI" = "Targeted", 
                                 "PcG targeted CGI + 1" = "Flanking",
                                 "Rest of the genome" = "Rest of genome") ) +
  labs(x = expression("Spike-in normalized WT#2 H3K27me3 signal" ), 
       y = expression("Spike-in normalized KO+L-3D H3K27me3 signal" ) ) +
  guides(colour = guide_legend(override.aes = list(alpha = 0.99))) +
  scatter_th +
  theme(legend.position = c(0.27, 0.92) ) -> pBins_WT_vs_KOrL3D2_Filtered_Targeted_NOLOG10
pBins_WT_vs_KOrL3D2_Filtered_Targeted_NOLOG10

Save to pdf vector.

Code
ggsave(path = pdf_dir_fig5,  
       filename = paste0("FigS5F_CGI_NoLog10_WT_vs_KOrL3D2_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
       plot = pBins_WT_vs_KOrL3D2_Filtered_Targeted_NOLOG10, device = cairo_pdf, units = "cm",
       width = 4.2, height = 4.2)

Rasterise only the point layer of ggplot

Code
rBins_WT_vs_KOrL3D2_NoLog10 <- rasterize(pBins_WT_vs_KOrL3D2_Filtered_Targeted_NOLOG10, 
                                                   layers = 'Point', dpi = 400)

Show plot as presented in figure.

Code
rBins_WT_vs_KOrL3D2_NoLog10

Save to pdf rasterised scatter plot.

Code
ggsave(path = pdf_dir_fig5,  
       filename = paste0("FigS5F_RASTERIZED_CGI_NoLog10_WT_vs_KOrL3D2_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
       plot = rBins_WT_vs_KOrL3D2_NoLog10, device = cairo_pdf, units = "cm",
       width = 4.2, height = 4.2)

3.3 Boxplot H3K27me3 signal in 2.5kb genomic bins

Display the same H3K27me3 signal in 2.5kb genomic bins as boxplots.

Code below is needed to colour the ggplot2 facet strip text.

Code
# requires package ggh4x
CGI_coloured_strip <-
  strip_themed(background_x = elem_list_rect(fill = c(
    '#203059', '#A2234C', '#203059', '#72817F'
  )),
  text_x = elem_list_text(colour = c('white')))

strip_CGI_conversion <- c(
  `PcG targeted CGI - 1` = "Flanking up",
  `PcG targeted CGI` = "Targeted CGI",
  `PcG targeted CGI + 1` = "Flanking down",
  `Rest of the genome` = "Rest of genome"
)

Reshape the data for plotting.

Code
log_bins25kb_K27me3 |>
  as_tibble() |>
  pivot_longer(cols = c("WT", 'CSex4', 'dex4', 'KO'), names_to = 'Sample', values_to = "log10_H3K27me3") |>
  mutate(Sample = factor(Sample, levels = c("WT", 'CSex4', 'dex4', 'KO') ) ) |>
  mutate(type2 = factor(type2, levels = c('PcG targeted CGI - 1', 'PcG targeted CGI', 'PcG targeted CGI + 1', 'Rest of the genome') ) ) -> endogenous_bins

log_bins25kb_K27me3_rescue |>
  as_tibble() |>
  pivot_longer(cols = c("WT2", 'KOrL4', 'KOrL6', 'KOrS1', 'KOrS3', 'KOrL3D2', 'KOrS3D2'), 
               names_to = 'Sample', values_to = "log10_H3K27me3") |>
  mutate(Sample = factor(Sample, levels = c("WT2", 'KOrL4', 'KOrL6', 'KOrS1', 'KOrS3', 'KOrL3D2', 'KOrS3D2') ) ) |>
  mutate(type2 = factor(type2, 
                        levels = c('PcG targeted CGI - 1', 
                                   'PcG targeted CGI',
                                   'PcG targeted CGI + 1',
                                   'Rest of the genome') )
         ) -> rescue_bins

Plot boxplot for panel 5F without the KO rescue lines

Code
ggplot(endogenous_bins) +
  aes(x = Sample, y = log10_H3K27me3, fill = Sample) +
  facet_wrap2(~type2, nrow = 1, scales = 'fixed', strip = CGI_coloured_strip, 
              labeller = labeller(type2 = strip_CGI_conversion ) ) +
  geom_boxplot(outlier.shape = NA, linewidth = 0.2, show.legend = F) +
  scale_x_discrete(labels = c('dex4' = '∆ex4') ) +
  scale_y_continuous(expand = expansion(mult = 0, add = c(0.2, 0.2) ) ) +
  coord_cartesian(ylim = c(0, 3.5), clip = 'on') +
  scale_fill_manual(values = c('WT' = "#377AA3", 'CSex4' = "#B65120", 
                               'dex4' = "#F7CB48",'KO' = "#728189") ) +
  labs(y =  expression("Spike-in normalized H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
  boxplot_th -> pBoxPlot_CGI_quant_endogenous
pBoxPlot_CGI_quant_endogenous

Save to pdf.

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("Fig5F_2.5kb_Bins_Boxplot_endogenous_lines.pdf"),
       plot = pBoxPlot_CGI_quant_endogenous, device = cairo_pdf, units = "cm",
       width = 7.3, height = 5)

Now same plot with rescue lines

Code
ggplot(rescue_bins) +
  aes(x = Sample, y = log10_H3K27me3, fill = Sample) +
  facet_wrap2(~type2, nrow = 1, scales = 'fixed', strip = CGI_coloured_strip, 
              labeller = labeller(type2 = strip_CGI_conversion ) ) +
  geom_boxplot(outlier.shape = NA, linewidth = 0.2, show.legend = F) +
  scale_x_discrete(labels = c('WT2' = 'WT', 'KOrL4' = 'KO+L#1', 
                              'KOrL6' = 'KO+L#2', 'KOrS1' = 'KO+S#1', 
                              'KOrS3' = 'KO+S#2',
                              'KOrL3D2' = 'KO+L-3D', 'KOrS3D2' = 'KO+S-3D') ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.02), add = 0),
                     n.breaks = 6) +
  coord_cartesian(ylim = c(0, 2.25), clip = 'on') +
  scale_fill_manual(values = c('WT2' = "#377AA3",
                               "KOrL4" = "mediumpurple2", "KOrL6" = "mediumpurple3", 
                               "KOrS1" = "darkorange1",  "KOrS3" = "darkorange2",
                               "KOrL3D2" = "#74A57F", "KOrS3D2" = "#E8B4BC")
                    ) +
  labs(y =  expression("Spike-in normalized H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
  boxplot_th -> pBoxPlot_CGI_quant_rescues
pBoxPlot_CGI_quant_rescues

Save to pdf.

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("Fig5F_2.5kb_Bins_Boxplot_rescue_lines.pdf"),
       plot = pBoxPlot_CGI_quant_rescues, device = cairo_pdf, units = "cm",
       width = 7.5, height = 5)

Combine the 2 into one plot:

Code
rbind(endogenous_bins, rescue_bins) |>
  subset(! Sample %in% c('KOrL6', 'KOrS3') ) |> #'WT2', 
  ggplot() +
  aes(x = Sample, y = log10_H3K27me3, fill = Sample) +
  facet_wrap2(~type2, nrow = 1, scales = 'fixed', strip = CGI_coloured_strip, 
              labeller = labeller(type2 = strip_CGI_conversion ) ) +
  geom_boxplot(outlier.shape = NA, linewidth = 0.2, show.legend = F) +
  scale_x_discrete(labels = c('dex4' = '∆ex4', 'KOrL4' = 'KO+L', 
                              'KOrS1' = 'KO+S', 'KOrL3D2' = 'KO+L-3D',
                              'KOrS3D2' = 'KO+S-3D') ) +
  scale_y_continuous(expand = expansion(mult = 0, add = c(0.2, 0.2) ) ) +
  coord_cartesian(ylim = c(0, 3.5), clip = 'on') +
  scale_fill_manual(values = c('WT' = "#377AA3", 'WT2' = "#377AA3", 'CSex4' = "#B65120",
                               'dex4' = "#F7CB48",'KO' = "#728189",
                               "KOrL4" = "mediumpurple2", "KOrS1" = "darkorange1",
                               "KOrL3D2" = "#74A57F", "KOrS3D2" = "#E8B4BC") ) +
  labs(y =  expression("Spike-in normalized H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
  boxplot_th -> pBoxPlot_CGI_quant
pBoxPlot_CGI_quant

I think the fact that the spike-in normalisation is was done with 2 different batches might explain with the KO rescues are lower than the KO.

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("Fig5F_2.5kb_Bins_Boxplot_endogenous_rescue_lines.pdf"),
       plot = pBoxPlot_CGI_quant, device = cairo_pdf, units = "cm",
       width = 7.5, height = 5)

Combine up and down flanking regions into facet

Code
CGI_coloured_strip <-
  strip_themed(background_x = elem_list_rect(fill = c(
    '#A2234C', '#203059', '#72817F')),
  text_x = elem_list_text(colour = c('white')))

strip_CGI_conversion <- c(
  `Targeted CGI` = "Targeted CGI",
  `Flanking region` = "Flanking",
  `Rest of the genome` = "Rest of genome"
)

Reshape data

Code
samples_batch1 <- c('WT', 'dex4', 'CSex4', 'KO')
samples_batch2 <- c('WT2', 'KOrL4', 'KOrL6', 'KOrS1', 'KOrS3', 'KOrL3D2', 'KOrS3D2')

rbind(endogenous_bins, rescue_bins) |>
  subset(! Sample %in% c('KOrL6', 'KOrS3') ) |>
  mutate(type3 = type2) |>
  mutate(type3 = case_when(type3 == 'PcG targeted CGI - 1' ~ 'Flanking region',
                           type3 == 'PcG targeted CGI + 1' ~ 'Flanking region',
                           type3 == 'PcG targeted CGI' ~ 'Targeted CGI',
                           type3 == 'Rest of the genome' ~ 'Rest of the genome') ) |>
  mutate(type3 = factor(type3, levels = c('Targeted CGI', 'Flanking region', 'Rest of the genome'))) |>
  mutate(batch = case_when(Sample %in% samples_batch1 ~ 'Batch1',
                           Sample %in% samples_batch2 ~ 'Batch2')
         ) -> combined

Plot figure 5F final.

Code
ggplot(combined) +
  aes(x = Sample, y = log10_H3K27me3, fill = Sample) +
  facet_wrap2(~batch+type3, nrow = 1, scales = 'free_x', strip = CGI_coloured_strip, 
              labeller = labeller(type3 = strip_CGI_conversion ) ) +
  geom_boxplot(outlier.shape = NA, linewidth = 0.2, show.legend = F) +
  scale_x_discrete(labels = c('WT' = 'WT#1', 'WT2' = 'WT#2',
                              'dex4' = '∆ex4', 'KOrL4' = 'KO+L', 
                              'KOrS1' = 'KO+S', 'KOrL3D2' = 'KO+L-3D',
                              'KOrS3D2' = 'KO+S-3D') ) +
  scale_y_continuous(expand = expansion(mult = 0, add = c(0.2, 0.2) ) ) +
  coord_cartesian(ylim = c(0, 3.5), clip = 'on') +
  scale_fill_manual(values = c('WT' = "#377AA3", 'WT2' = "#377AA3", 'CSex4' = "#B65120",
                               'dex4' = "#F7CB48",'KO' = "#728189",
                               "KOrL4" = "mediumpurple2", "KOrS1" = "darkorange1",
                               "KOrL3D2" = "#74A57F", "KOrS3D2" = "#E8B4BC") ) +
  labs(y =  expression("Spike-in normalized H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
  boxplot_th -> pBoxPlot_CGI_quant_3facets
pBoxPlot_CGI_quant_3facets

Save to pdf.

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("Fig5F_2.5kb_Bins_Boxplot_endogenous_rescue_lines_3facets.pdf"),
       plot = pBoxPlot_CGI_quant_3facets, device = cairo_pdf, units = "cm",
       width = 11.0, height = 5)

Split the 2 batches of ChIP-seq in one main and one supplementary figure.

Code
subset(combined, batch == 'Batch1') |>
ggplot() +
  aes(x = Sample, y = log10_H3K27me3, fill = Sample) +
  facet_wrap2( ~type3, nrow = 1, scales = 'fixed', strip = CGI_coloured_strip, 
              labeller = labeller(type3 = strip_CGI_conversion ) ) +
  geom_boxplot(outlier.shape = NA, linewidth = 0.2, show.legend = F) +
  scale_x_discrete(labels = c('dex4' = '∆ex4') ) +
  scale_y_continuous(expand = expansion(mult = 0, add = c(0.2, 0.2) ) ) +
  coord_cartesian(ylim = c(0, 3.5), clip = 'on') +
  scale_fill_manual(values = c('WT' = "#377AA3", 'CSex4' = "#B65120",
                               'dex4' = "#F7CB48",'KO' = "#728189") ) +
  labs(y =  expression("Spike-in normalized H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
  boxplot_th -> pBoxPlot_CGI_quant_3facets_batch1
pBoxPlot_CGI_quant_3facets_batch1

Save to pdf.

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("Fig5F_2.5kb_Bins_Boxplot_endogenous_lines_3facets_batch1.pdf"),
       plot = pBoxPlot_CGI_quant_3facets_batch1, device = cairo_pdf, units = "cm",
       width = 5.5, height = 5)

Batch 2

Code
subset(combined, batch == 'Batch2') |>
ggplot() +
  aes(x = Sample, y = log10_H3K27me3, fill = Sample) +
  facet_wrap2(~type3, nrow = 1, scales = 'fixed', strip = CGI_coloured_strip, 
              labeller = labeller(type3 = strip_CGI_conversion ) ) +
  geom_boxplot(outlier.shape = NA, linewidth = 0.2, show.legend = F) +
  scale_x_discrete(labels = c('WT2' = 'WT', 'KOrL4' = 'KO+L', 'KOrS1' = 'KO+S',
                              'KOrL3D2' = 'KO+L-3D', 'KOrS3D2' = 'KO+S-3D') ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.02), add = 0) ) +
  coord_cartesian(ylim = c(0, 2.5), clip = 'on') +
  scale_fill_manual(values = c('WT2' = "#377AA3",
                               "KOrL4" = "mediumpurple2", "KOrS1" = "darkorange1",
                               "KOrL3D2" = "#74A57F", "KOrS3D2" = "#E8B4BC") ) +
  labs(y =  expression("Spike-in normalized H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
  boxplot_th -> pBoxPlot_CGI_quant_3facets_batch2
pBoxPlot_CGI_quant_3facets_batch2

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("FigS5H_2.5kb_Bins_Boxplot_rescue_lines_3facets_batch2.pdf"),
       plot = pBoxPlot_CGI_quant_3facets_batch2, device = cairo_pdf, units = "cm",
       width = 5.5, height = 5)

3.4 H3K27me3 signal at PRC2 targeted (or not) CpG Islands

CGI = CpG islands.

Targted = SUZ12 peak was found to overlap the CGI.

Non-targeted = SUZ12 didn’t bind that CGI.

3.4.1 Boxplot spike-in normalised

Import bed files for the boxplots. Here every sample is identified by a number.

Code
# b <- 1
region <- c()
sample <- c()
beds <- list()
for (b in 1:length(bed_paths)) {
  # define region based on letter
  if ( grepl(pattern = "^[1-4]A", x = bed_paths[b] ) ) { region[b] <- "CGI_H3K27me3_genes" } 
  else if (grepl(pattern = "^[1-4]B", x = bed_paths[b] ) ) { region[b] <- "CGI_nonPcG_genes" }
  # define sample based on number
  if ( grepl(pattern = "^1", x = bed_paths[b]) ) { sample[b] <- "WT" } 
  else if (grepl(pattern = "^2", x = bed_paths[b] ) ) { sample[b] <- "∆ex4" }
  else if (grepl(pattern = "^3", x = bed_paths[b] ) ) { sample[b] <- "KO" }
  else if (grepl(pattern = "^4", x = bed_paths[b] ) ) { sample[b] <- "CSex4" }
  
  message("Importing sample: ", sample[b], ", region: ", region[b])
  # read in each bed file and annotate it
  read_delim(file = file.path(round17_meta_dir, bed_paths[b]), delim = "\t", 
             col_names = c('chr', 'start', 'end', 'min', 'average', 'max', 'sum'),
             show_col_types = FALSE) |>
    mutate(Sample = sample[b], Region = region[b]) |>
    group_by(Sample, Region) |>
    mutate(Numbered_region = n() ) |>
    ungroup() -> beds[[b]]
  
  names(beds)[b] <- paste0(sample[b], "_", region[b])
}
Importing sample: WT, region: CGI_H3K27me3_genes
Importing sample: WT, region: CGI_nonPcG_genes
Importing sample: ∆ex4, region: CGI_H3K27me3_genes
Importing sample: ∆ex4, region: CGI_nonPcG_genes
Importing sample: KO, region: CGI_H3K27me3_genes
Importing sample: KO, region: CGI_nonPcG_genes
Importing sample: CSex4, region: CGI_H3K27me3_genes
Importing sample: CSex4, region: CGI_nonPcG_genes

Merge list of dataframes (bed files) into one.

Code
all_beds <- do.call('rbind', beds) |>
  mutate(Sample = factor(Sample, levels = c("WT", "CSex4", "∆ex4", "KO")) ) |>
  mutate(Pretty_Region = case_when(Region == "CGI_H3K27me3_genes" ~ "PRC2 targeted CGI",
                                   Region == "CGI_nonPcG_genes" ~ "Other CGI") ) |>
  mutate(Pretty_Region = paste0(Pretty_Region, " (n = ", Numbered_region, ")" ) ) |>
  mutate(Pretty_Region = factor(Pretty_Region, 
                                levels = c("PRC2 targeted CGI (n = 2665)", 
                                           "Other CGI (n = 9999)") ) )

Set colours for facets strip text

Code
Metaplot_coloured_strip <- strip_themed(background_x = elem_list_rect(fill = c('#A2234C', '#003e1f') ),
                                   text_x = elem_list_text(colour = c('white') ) )

strip_CGI_conversion <- c(
  `PRC2 targeted CGI (n = 2665)` = "SUZ12 Targeted CGI (n = 2665)",
  `Other CGI (n = 9999)` = "Non-targeted CGI (n = 9999)"
)

Plot H3K27me3 signal in the CGI targeted regions used for the meta genes later on as a boxplot.

Code
ggplot(all_beds) +
  aes(x = Sample, y = log(average + 1, base = 10), fill = Sample) +
  facet_wrap2( ~Pretty_Region, strip = Metaplot_coloured_strip,
              labeller = labeller(type = strip_CGI_conversion) ) +
  geom_boxplot(outlier.shape = NA, show.legend = F, linewidth = 0.2) +
  scale_fill_manual(values = c('WT' = "#377AA3", 'CSex4' = "#B65120", 
                               '∆ex4' = "#F7CB48",'KO' = "#728189") ) +
  labs(y = expression("Spike-in norm. average H3K27me3 signal (" ~ log[10] ~ " + 1)" ) ) +
  boxplot_th -> p_metagenes_regions_signal_boxplot
p_metagenes_regions_signal_boxplot

Save to pdf.

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("Metagenes_regions_H3K27me3_signal_Boxplot.pdf"),
       plot = p_metagenes_regions_signal_boxplot, device = cairo_pdf, units = "cm",
       width = 7.3, height = 5)

3.4.2 Metagene profiles spike-in normalised in endogenous cells

Import profiles stored in text files.

First import the files Ivano made for the targeted CGI.

Code
CGI_targeted_K27me3_WT <- read_delim(
  file = file.path(CGI_Dir, "14_MetaCGIplots/7A_GENEplot_5000/GENEprofile_7A_5000.txt"), 
  delim = '\t', show_col_types = FALSE ) |>
  setNames(c("MetaPosition", "K27me3_signal") ) |>
  mutate(type = "PRC2 targeted CGI", Sample = "WT") 

CGI_targeted_K27me3_dex4 <- read_delim(
  file = file.path(CGI_Dir, "14_MetaCGIplots/8A_GENEplot_5000/GENEprofile_8A_5000.txt"),
  delim = '\t', show_col_types = FALSE ) |>
  setNames(c("MetaPosition", "K27me3_signal") ) |>
  mutate(type = "PRC2 targeted CGI", Sample = "dex4") 

CGI_targeted_K27me3_KO <- read_delim(
  file = file.path(CGI_Dir, "14_MetaCGIplots/9A_GENEplot_5000/GENEprofile_9A_5000.txt"),
  delim = '\t', show_col_types = FALSE ) |>
  setNames(c("MetaPosition", "K27me3_signal") ) |>
  mutate(type = "PRC2 targeted CGI", Sample = "KO") 

CGI_targeted_K27me3_CSex4 <- read_delim(
  file = file.path(CGI_Dir, "16_MetaCGIplotsEx4100/E3_GENEplot_5000/GENEprofile_E3_5000.txt"),
  delim = '\t', show_col_types = FALSE ) |>
  setNames(c("MetaPosition", "K27me3_signal") ) |>
  mutate(type = "PRC2 targeted CGI", Sample = "CSex4") 

CGI_targeted_K27me3 <- rbind(CGI_targeted_K27me3_WT, CGI_targeted_K27me3_dex4, CGI_targeted_K27me3_KO, CGI_targeted_K27me3_CSex4) |>
  as_tibble()

Then import the non-targeted CGI.

Code
CGI_other_K27me3_WT <- read_delim(
  file = file.path(CGI_Dir, "14_MetaCGIplots/7B_GENEplot_5000/GENEprofile_7B_5000.txt"),
  delim = '\t', show_col_types = FALSE ) |>
  setNames(c("MetaPosition", "K27me3_signal") ) |>
  mutate(type = "Other CGI", Sample = "WT") 

CGI_other_K27me3_dex4 <- read_delim(
  file = file.path(CGI_Dir, "14_MetaCGIplots/8B_GENEplot_5000/GENEprofile_8B_5000.txt"),
  delim = '\t', show_col_types = FALSE ) |>
  setNames(c("MetaPosition", "K27me3_signal") ) |>
  mutate(type = "Other CGI", Sample = "dex4") 

CGI_other_K27me3_KO <- read_delim(
  file = file.path(CGI_Dir, "14_MetaCGIplots/9B_GENEplot_5000/GENEprofile_9B_5000.txt"),
  delim = '\t', show_col_types = FALSE ) |>
  setNames(c("MetaPosition", "K27me3_signal") ) |>
  mutate(type = "Other CGI", Sample = "KO") 

CGI_other_K27me3_CSex4 <- read_delim(
  file = file.path(CGI_Dir,"16_MetaCGIplotsEx4100/E1_GENEplot_5000/GENEprofile_E1_5000.txt"),
  delim = '\t', show_col_types = FALSE ) |>
  setNames(c("MetaPosition", "K27me3_signal") ) |>
  mutate(type = "Other CGI", Sample = "CSex4") 

CGI_other_K27me3 <- rbind(CGI_other_K27me3_WT, CGI_other_K27me3_dex4, CGI_other_K27me3_KO, CGI_other_K27me3_CSex4) |>
  as_tibble()

Combine data and reshape it for plotting.

Code
Meta_K27me3 <- rbind(CGI_targeted_K27me3, CGI_other_K27me3) |>
  mutate(MetaPosition = MetaPosition - 5000) |>
  mutate(type = factor(type, levels = c("PRC2 targeted CGI", "Other CGI") )) |>
  group_by(Sample, type) |>
  arrange(MetaPosition) |>
  ungroup() |>
  mutate(Sample = factor(Sample, levels = c('WT', 'CSex4', 'dex4', 'KO') ) ) |>
  mutate(MetaPosition = MetaPosition / 1000)  # turn bases in kilo-bases

Rename strip for the figure

Code
strip_CGI_conversion <- c(
  `PRC2 targeted CGI` = "SUZ12 targeted CGI",
  `Other CGI` = "Non-targeted CGI"
)

Since the data contains every single nucleotide signal, I use only 1/4 of the data to plot as this does not affect the trend, but makes the pdf file easier to work with. This is done by using sample_frac(size = 0.25).

Code
Meta_K27me3 |>
  sample_frac(size = 0.25) |> 
ggplot() +
  aes(x = MetaPosition, y = K27me3_signal, colour = Sample) +
  facet_wrap2( ~type, strip = Metaplot_coloured_strip,
              labeller = labeller(type = strip_CGI_conversion) ) +
  geom_line(linewidth = 0.5) +
  scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.1) ), 
                     breaks = c(-10, -5, 0, 5, 10) ) +
  scale_y_continuous(expand = expansion(mult = 0.025, add = 0 ) ) +
  scale_colour_manual(values = c('WT' = "#377AA3", 'CSex4' = "#B65120",
                                 'dex4' = "#F7CB48",'KO' = "#728189"),
                      labels = c('dex4' = '∆ex4') ) +
  # coord_cartesian(ylim = c(0, NA), clip = 'off' ) +
  labs(x = 'Distance to CGI center (kb)', 
       y = expression("Spike-in normalised H3K27me3 signal (" ~ log[10] ~ ")" )) +
  profile_th -> pMetaPlot_CGI
pMetaPlot_CGI

Save to pdf.

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("Fig5G_Targeted_CGI_Metagene_Profiles_Endogenous_Lines.pdf"),
       plot = pMetaPlot_CGI, device = cairo_pdf, units = "cm",
       width = 7.3, height = 4.9)

3.4.3 Metagene profiles spike-in normalised in rescues cell lines

Metagene profile spike-in normalised.

Import the files Enrique made for the targeted CGI for the MTF2 ChIP-seq.

Code
trgtd_K27me3_WT1 <- read_prfl(path_dir = round23_metagene_dir,
                              txt_name = "M1A_GENEplot_5000/GENEprofile_M1A_5000.txt",
                              epitope = "H3K27me3", geneset_type = "PRC2 targeted CGI", 
                              sample_name = "WT1")

trgtd_K27me3_WT2 <- read_prfl(path_dir = round23_metagene_dir,
                              txt_name = "M2A_GENEplot_5000/GENEprofile_M2A_5000.txt",
                              epitope = "H3K27me3", geneset_type = "PRC2 targeted CGI", 
                              sample_name = "WT2")

trgtd_K27me3_L1 <- read_prfl(path_dir = round23_metagene_dir,
                             txt_name = "M5A_GENEplot_5000/GENEprofile_M5A_5000.txt",
                             epitope = "H3K27me3", geneset_type = "PRC2 targeted CGI", 
                             sample_name = "KOrL1")

trgtd_K27me3_L2 <- read_prfl(path_dir = round23_metagene_dir,
                             txt_name = "M6A_GENEplot_5000/GENEprofile_M6A_5000.txt",
                             epitope = "H3K27me3", geneset_type = "PRC2 targeted CGI", 
                             sample_name = "KOrL2")

trgtd_K27me3_S1 <- read_prfl(path_dir = round23_metagene_dir,
                             txt_name = "M3A_GENEplot_5000/GENEprofile_M3A_5000.txt",
                             epitope = "H3K27me3", geneset_type = "PRC2 targeted CGI", 
                             sample_name = "KOrS1")

trgtd_K27me3_S2 <- read_prfl(path_dir = round23_metagene_dir,
                             txt_name = "M4A_GENEplot_5000/GENEprofile_M4A_5000.txt",
                             epitope = "H3K27me3", geneset_type = "PRC2 targeted CGI", 
                             sample_name = "KOrS2")

trgtd_K27me3_L3D <- read_prfl(path_dir = round23_metagene_dir,
                             txt_name = "31A_GENEplot_5000/GENEprofile_31A_5000.txt",
                             epitope = "H3K27me3", geneset_type = "PRC2 targeted CGI", 
                             sample_name = "KOrL3D1")

trgtd_K27me3_S3D <- read_prfl(path_dir = round23_metagene_dir,
                             txt_name = "32A_GENEplot_5000/GENEprofile_32A_5000.txt",
                             epitope = "H3K27me3", geneset_type = "PRC2 targeted CGI", 
                             sample_name = "KOrS3D1")

CGI_targeted_K27me3 <- rbind(trgtd_K27me3_WT1, trgtd_K27me3_WT2,
                             trgtd_K27me3_L1, trgtd_K27me3_L2,
                             trgtd_K27me3_S1, trgtd_K27me3_S2,
                             trgtd_K27me3_L3D, trgtd_K27me3_S3D) |>
  as_tibble()

Import rescues signal in not targeted CGIs.

Code
nontrgtd_K27me3_WT1 <- read_prfl(path_dir = round23_metagene_dir,
                                txt_name = "M1B_GENEplot_5000/GENEprofile_M1B_5000.txt",
                                  epitope = "H3K27me3", geneset_type = "Other CGI", 
                              sample_name = "WT1")

nontrgtd_K27me3_WT2 <- read_prfl(path_dir = round23_metagene_dir,
                                txt_name = "M2B_GENEplot_5000/GENEprofile_M2B_5000.txt",
                                epitope = "H3K27me3", geneset_type = "Other CGI", 
                                sample_name = "WT2")

nontrgtd_K27me3_L1 <- read_prfl(path_dir = round23_metagene_dir,
                               txt_name = "M5B_GENEplot_5000/GENEprofile_M5B_5000.txt",
                               epitope = "H3K27me3", geneset_type = "Other CGI", 
                               sample_name = "KOrL1")

nontrgtd_K27me3_L2 <- read_prfl(path_dir = round23_metagene_dir,
                               txt_name = "M6B_GENEplot_5000/GENEprofile_M6B_5000.txt",
                               epitope = "H3K27me3", geneset_type = "Other CGI", 
                               sample_name = "KOrL2")

nontrgtd_K27me3_S1 <- read_prfl(path_dir = round23_metagene_dir,
                                txt_name = "M3B_GENEplot_5000/GENEprofile_M3B_5000.txt",
                                epitope = "H3K27me3", geneset_type = "Other CGI", 
                                sample_name = "KOrS1")

nontrgtd_K27me3_S2 <- read_prfl(path_dir = round23_metagene_dir,
                                txt_name = "M4B_GENEplot_5000/GENEprofile_M4B_5000.txt",
                                epitope = "H3K27me3", geneset_type = "Other CGI", 
                                sample_name = "KOrS2")

nontrgtd_K27me3_L3D <- read_prfl(path_dir = round23_metagene_dir,
                                 txt_name = "31B_GENEplot_5000/GENEprofile_31B_5000.txt",
                                 epitope = "H3K27me3", geneset_type = "Other CGI", 
                                 sample_name = "KOrL3D1")

nontrgtd_K27me3_S3D <- read_prfl(path_dir = round23_metagene_dir,
                                 txt_name = "32B_GENEplot_5000/GENEprofile_32B_5000.txt",
                                 epitope = "H3K27me3", geneset_type = "Other CGI", 
                                 sample_name = "KOrS3D1")

CGI_other_K27me3 <- rbind(nontrgtd_K27me3_WT1, nontrgtd_K27me3_WT2,
                          nontrgtd_K27me3_L1, nontrgtd_K27me3_L2,
                          nontrgtd_K27me3_S1, nontrgtd_K27me3_S2,
                          nontrgtd_K27me3_L3D, nontrgtd_K27me3_S3D) |>
  as_tibble()

Combine data and reshape it for plotting.

Code
Meta_K27me3 <- rbind(CGI_targeted_K27me3, CGI_other_K27me3) |>
  mutate(MetaPosition = MetaPosition - 5000) |>
  mutate(Genotype = str_remove(pattern = "[a1-z9]$", string = Sample)) |> # remove last number (i.e. replicate)
  mutate(type = factor(type, levels = c("PRC2 targeted CGI", "Other CGI") )) |>
  group_by(Genotype, type, MetaPosition) |>
  mutate(Average_Signal = mean(H3K27me3_signal, na.rm = T) )|>
  arrange(MetaPosition) |>
  ungroup() |>
  mutate(Genotype = str_replace(pattern = 'r', replacement = '\\+', Genotype)) |>
  mutate(Genotype = factor(Genotype, levels = rev(c('WT', 'KO+L', 'KO+S', 'KO+L3D', 'KO+S3D')) ) ) |>
  mutate(Sample = factor(Sample, 
                         levels = c('WT1', 'WT2', 'KOrL1', 'KOrL2', 'KOrS1', 'KOrS2', 'KOrL3D1', 'KOrS3D1'))) |>
  mutate(MetaPosition = MetaPosition / 1000) |> # turn bases in kilo-bases
# subset a small a fraction of the total.
  select(MetaPosition, type, Genotype, Average_Signal) |> 
  unique() |> sample_frac(size = 0.25) 

Set colour palettes and plot

Code
genotype_palette <- c('WT' = "#377AA3", 'KO+L' = "mediumpurple2",
                      'KO+S' = "darkorange1", 'KO+L3D'  = "#74A57F",
                      'KO+S3D'= "#E8B4BC")

ggplot(Meta_K27me3) +
  aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
  facet_wrap2( ~type, strip = Metaplot_coloured_strip,
              labeller = labeller(type = strip_CGI_conversion) ) +
  geom_line(linewidth = 0.5) +
  scale_x_continuous(expand = expansion(mult = 0, add = 0.1), 
                     breaks = c(-10, -5, 0, 5, 10) ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.025), add = 0 ) ) +
  scale_colour_manual(values = genotype_palette,
                      guide = guide_legend(reverse = TRUE) ) +
  coord_cartesian(ylim = c(0, NA), clip = 'off' ) +
  labs(x = 'Distance to CGI center (kb)', 
       y = 'Spike-in normalised H3K27me3 signal') +
  profile_th -> pMetaPlot_CGI_H3K27me3_Rescues
pMetaPlot_CGI_H3K27me3_Rescues

Save to pdf.

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("FigS5I_H3K27me3_Targeted_CGI_Metagene_Profiles_Rescue_Lines.pdf"),
       plot = pMetaPlot_CGI_H3K27me3_Rescues, device = cairo_pdf, units = "cm",
       width = 7.3, height = 4.7)

3.5 MTF2 signal at PRC2 targeted (or not) CpG Islands

MTF2 is a component of the PRC2.1 complex.

Metagene profile spike-in normalised.

Import the files Enrique made for the targeted CGI for the MTF2 ChIP-seq.

Code
trgtd_MTF2_WT1 <- read_prfl(path_dir = round21_meta_dir,
                            txt_name = "M1A_GENEplot_5000/GENEprofile_M1A_5000.txt",
                            epitope = "MTF2", geneset_type = "PRC2 targeted CGI", 
                            sample_name = "WT1")

trgtd_MTF2_WT2 <- read_prfl(path_dir = round21_meta_dir,
                            txt_name = "M2A_GENEplot_5000/GENEprofile_M2A_5000.txt",
                            epitope = "MTF2", geneset_type = "PRC2 targeted CGI", 
                            sample_name = "WT2")

trgtd_MTF2_CSex4a <- read_prfl(path_dir = round21_meta_dir,
                               txt_name = "M3A_GENEplot_5000/GENEprofile_M3A_5000.txt",
                               epitope = "MTF2", geneset_type = "PRC2 targeted CGI", 
                               sample_name = "CSex4a")

trgtd_MTF2_CSex4b <- read_prfl(path_dir = round21_meta_dir,
                               txt_name = "M4A_GENEplot_5000/GENEprofile_M4A_5000.txt",
                               epitope = "MTF2", geneset_type = "PRC2 targeted CGI", 
                               sample_name = "CSex4b")

trgtd_MTF2_dex4a <- read_prfl(path_dir = round21_meta_dir,
                              txt_name = "M5A_GENEplot_5000/GENEprofile_M5A_5000.txt",
                              epitope = "MTF2", geneset_type = "PRC2 targeted CGI", 
                              sample_name = "∆ex4a")

trgtd_MTF2_dex4b <- read_prfl(path_dir = round21_meta_dir,
                              txt_name = "M6A_GENEplot_5000/GENEprofile_M6A_5000.txt",
                              epitope = "MTF2", geneset_type = "PRC2 targeted CGI", 
                              sample_name = "∆ex4b")

CGI_targeted_MTF2 <- rbind(trgtd_MTF2_WT1, trgtd_MTF2_WT2, 
                           trgtd_MTF2_CSex4a, trgtd_MTF2_CSex4b,
                           trgtd_MTF2_dex4a, trgtd_MTF2_dex4b) |>
  as_tibble()

Import MTF2 signal at non-targeted CGI.

Code
nontrgtd_MTF2_WT1 <- read_prfl(path_dir = round21_meta_dir,
                               txt_name = "M1B_GENEplot_5000/GENEprofile_M1B_5000.txt",
                               epitope = "MTF2", geneset_type = "Other CGI", 
                               sample_name = "WT1")

nontrgtd_MTF2_WT2 <- read_prfl(path_dir = round21_meta_dir,
                               txt_name = "M2B_GENEplot_5000/GENEprofile_M2B_5000.txt",
                               epitope = "MTF2", geneset_type = "Other CGI", 
                               sample_name = "WT2")

nontrgtd_MTF2_CSex4a <- read_prfl(path_dir = round21_meta_dir,
                                  txt_name = "M3B_GENEplot_5000/GENEprofile_M3B_5000.txt",
                                  epitope = "MTF2", geneset_type = "Other CGI", 
                                  sample_name = "CSex4a")

nontrgtd_MTF2_CSex4b <- read_prfl(path_dir = round21_meta_dir,
                                  txt_name = "M4B_GENEplot_5000/GENEprofile_M4B_5000.txt",
                                  epitope = "MTF2", geneset_type = "Other CGI", 
                                  sample_name = "CSex4b")

nontrgtd_MTF2_dex4a <- read_prfl(path_dir = round21_meta_dir,
                                 txt_name = "M5B_GENEplot_5000/GENEprofile_M5B_5000.txt",
                                 epitope = "MTF2", geneset_type = "Other CGI", 
                                 sample_name = "∆ex4a")

nontrgtd_MTF2_dex4b <- read_prfl(path_dir = round21_meta_dir,
                                 txt_name = "M6B_GENEplot_5000/GENEprofile_M6B_5000.txt",
                                 epitope = "MTF2", geneset_type = "Other CGI", 
                                 sample_name = "∆ex4b")

CGI_other_MTF2 <- rbind(nontrgtd_MTF2_WT1, nontrgtd_MTF2_WT2, 
                        nontrgtd_MTF2_CSex4a, nontrgtd_MTF2_CSex4b,
                        nontrgtd_MTF2_dex4a, nontrgtd_MTF2_dex4b) |>
  as_tibble()

Combine data and reshape it for plotting. Since the data contains every single nucleotide signal, I use only 1/4 of the data to plot as this does not affect the trend, but makes the pdf file easier to work with. This is done by using sample_frac(size = 0.25).

Code
Meta_MTF2 <- rbind(CGI_targeted_MTF2, CGI_other_MTF2) |>
  mutate(MetaPosition = MetaPosition - 5000) |>
  mutate(Genotype = str_remove(pattern = "[a1-z9]$", string = Sample)) |>
  mutate(type = factor(type, levels = c("PRC2 targeted CGI", "Other CGI") )) |>
  group_by(Genotype, type, MetaPosition) |>
  mutate(Average_Signal = mean(MTF2_signal, na.rm = T) )|>
  arrange(MetaPosition) |>
  ungroup() |>
  mutate(Genotype = factor(Genotype, levels = c('WT', 'CSex4', '∆ex4') ) ) |>
  mutate(Sample = factor(Sample, levels = c('WT1', 'WT2', 'CSex4a', 'CSex4b', '∆ex4a', '∆ex4b') ) ) |>
  mutate(MetaPosition = MetaPosition / 1000) |> # turn bases in kilo-bases
  # subset a small a fraction of the total.
  select(MetaPosition, type, Genotype, Average_Signal) |> 
  unique() |> sample_frac(size = 0.25) 

Set colour palettes

Code
samples_palette <- c('WT1' = "#377AA3", 'WT2' = "#377AA3",
                     'CSex4a' = "#B65120", 'CSex4b' = "#B65120",
                     '∆ex4a' = "#F7CB48", '∆ex4b' = "#F7CB48")

genotype_palette <- c('WT' = "#377AA3", 'CSex4' = "#B65120", '∆ex4' = "#F7CB48")

Plot MTF2 profile

Code
ggplot(Meta_MTF2) +
  aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
  facet_wrap2( ~type, strip = Metaplot_coloured_strip,
              labeller = labeller(type = strip_CGI_conversion) ) +
  geom_line(linewidth = 0.5) +
  scale_x_continuous(expand = expansion(mult = 0, add = 0.1), 
                     breaks = c(-10, -5, 0, 5, 10) ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.025), add = 0 ) ) +
  scale_colour_manual(values = genotype_palette ) +
  coord_cartesian(ylim = c(0, NA), clip = 'on') +
  labs(x = 'Distance to CGI center (kb)', 
       y = 'Spike-in normalised MTF2 signal') +
  profile_th -> pMetaPlot_CGI_MTF2
pMetaPlot_CGI_MTF2

Save to pdf.

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("Fig5I_MTF2_Metagene_Profiles.pdf"),
       plot = pMetaPlot_CGI_MTF2, device = cairo_pdf, units = "cm",
       width = 7.3, height = 4.9)

Save for main figure.

Code
Meta_MTF2$Epitope <- 'MTF2'

Meta_MTF2 |>
  subset(type == 'PRC2 targeted CGI') |>
  ggplot() +
  aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
  facet_wrap2( ~Epitope) +
  geom_line(linewidth = 0.5) +
  scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.1) ), 
                     breaks = c(-10, -5, 0, 5, 10) ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.05), add = 0 ), n.breaks = 5 ) +
  scale_colour_manual(values = genotype_palette ) +
  coord_cartesian(ylim = c(0, NA), xlim = c(-10, 10), clip = 'on') +
  labs(x = 'Distance to CGI center (kb)', 
       y = 'Spike-in normalised signal at SUZ12 targeted CGI') +
  profile_th +
  theme(legend.position = c(0.85, 0.87)) -> pMetaPlot_MTF2
pMetaPlot_MTF2

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("Fig5I_MTF2_Targeted_CGI_Metagene_Profiles.pdf"),
       plot = pMetaPlot_MTF2, device = cairo_pdf, units = "cm",
       width = 4, height = 4.9)

3.6 JARID2 signal at PRC2 targeted (or not) CpG Islands

Metagene profile of spike-in normalised JARID2 ChIP-seq.

Code
trgtd_JARID2_WT1 <- read_prfl(path_dir = round21_meta_dir,
                                 txt_name = "J1A_GENEplot_5000/GENEprofile_J1A_5000.txt",
                                 epitope = "JARID2", geneset_type = "PRC2 targeted CGI", 
                                 sample_name = "WT1")

trgtd_JARID2_WT2 <- read_prfl(path_dir = round21_meta_dir,
                                   txt_name = "J2A_GENEplot_5000/GENEprofile_J2A_5000.txt",
                                   epitope = "JARID2", geneset_type = "PRC2 targeted CGI", 
                                   sample_name = "WT2")

trgtd_JARID2_CSex4a <- read_prfl(path_dir = round21_meta_dir,
                               txt_name = "J3A_GENEplot_5000/GENEprofile_J3A_5000.txt",
                               epitope = "JARID2", geneset_type = "PRC2 targeted CGI", 
                               sample_name = "CSex4a")

trgtd_JARID2_CSex4b <- read_prfl(path_dir = round21_meta_dir,
                               txt_name = "J4A_GENEplot_5000/GENEprofile_J4A_5000.txt",
                               epitope = "JARID2", geneset_type = "PRC2 targeted CGI", 
                               sample_name = "CSex4b")

trgtd_JARID2_dex4a <- read_prfl(path_dir = round21_meta_dir,
                              txt_name = "J5A_GENEplot_5000/GENEprofile_J5A_5000.txt",
                              epitope = "JARID2", geneset_type = "PRC2 targeted CGI", 
                              sample_name = "∆ex4a")

trgtd_JARID2_dex4b <- read_prfl(path_dir = round21_meta_dir,
                              txt_name = "J6A_GENEplot_5000/GENEprofile_J6A_5000.txt",
                              epitope = "JARID2", geneset_type = "PRC2 targeted CGI", 
                              sample_name = "∆ex4b")

CGI_targeted_JARID2 <- rbind(trgtd_JARID2_WT1, trgtd_JARID2_WT2, 
                             trgtd_JARID2_CSex4a, trgtd_JARID2_CSex4b,
                             trgtd_JARID2_dex4a, trgtd_JARID2_dex4b) |>
  as_tibble()

Import JARID2 signal at non-targeted CGI.

Code
nontrgtd_JARID2_WT1 <- read_prfl(path_dir = round21_meta_dir,
                                   txt_name = "J1B_GENEplot_5000/GENEprofile_J1B_5000.txt",
                                   epitope = "JARID2", geneset_type = "Other CGI", 
                                   sample_name = "WT1")

nontrgtd_JARID2_WT2 <- read_prfl(path_dir = round21_meta_dir,
                                   txt_name = "J2B_GENEplot_5000/GENEprofile_J2B_5000.txt",
                                   epitope = "JARID2", geneset_type = "Other CGI", 
                                   sample_name = "WT2")

nontrgtd_JARID2_CSex4a <- read_prfl(path_dir = round21_meta_dir,
                                   txt_name = "J3B_GENEplot_5000/GENEprofile_J3B_5000.txt",
                                   epitope = "JARID2", geneset_type = "Other CGI", 
                                   sample_name = "CSex4a")

nontrgtd_JARID2_CSex4b <- read_prfl(path_dir = round21_meta_dir,
                               txt_name = "J4B_GENEplot_5000/GENEprofile_J4B_5000.txt",
                               epitope = "JARID2", geneset_type = "Other CGI", 
                               sample_name = "CSex4b")

nontrgtd_JARID2_dex4a <- read_prfl(path_dir = round21_meta_dir,
                              txt_name = "J5B_GENEplot_5000/GENEprofile_J5B_5000.txt",
                              epitope = "JARID2", geneset_type = "Other CGI", 
                              sample_name = "∆ex4a")

nontrgtd_JARID2_dex4b <- read_prfl(path_dir = round21_meta_dir,
                              txt_name = "J6B_GENEplot_5000/GENEprofile_J6B_5000.txt",
                              epitope = "JARID2", geneset_type = "Other CGI", 
                              sample_name = "∆ex4b")

CGI_other_JARID2 <- rbind(nontrgtd_JARID2_WT1, nontrgtd_JARID2_WT2, 
                        nontrgtd_JARID2_CSex4a, nontrgtd_JARID2_CSex4b,
                        nontrgtd_JARID2_dex4a, nontrgtd_JARID2_dex4b) |>
  as_tibble()

Combine data and reshape it for plotting.

Code
Meta_JARID2 <- rbind(CGI_targeted_JARID2, CGI_other_JARID2) |>
  mutate(MetaPosition = MetaPosition - 5000) |>
  mutate(Genotype = str_remove(pattern = "[a1-z9]$", string = Sample)) |>
  mutate(type = factor(type, levels = c("PRC2 targeted CGI", "Other CGI") )) |>
  group_by(Genotype, type, MetaPosition) |>
  mutate(Average_Signal = mean(JARID2_signal, na.rm = T) )|>
  arrange(MetaPosition) |>
  ungroup() |>
  mutate(Genotype = factor(Genotype, levels = c('WT', 'CSex4', '∆ex4') ) ) |>
  mutate(Sample = factor(Sample, levels = c('WT1', 'WT2', 'CSex4a', 'CSex4b', '∆ex4a', '∆ex4b') ) ) |>
  mutate(MetaPosition = MetaPosition / 1000) |> # turn bases in kilo-bases
# subset a small a fraction of the total.
  select(MetaPosition, type, Genotype, Average_Signal) |> 
  unique() |> sample_frac(size = 0.25) 

Plot JARID2 profile.

Code
ggplot(Meta_JARID2) +
  aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
  facet_wrap2( ~type, strip = Metaplot_coloured_strip,
              labeller = labeller(type = strip_CGI_conversion) ) +
  geom_line(linewidth = 0.5) +
  scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.1) ), 
                     breaks = c(-10, -5, 0, 5, 10) ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.025), add = 0 ) ) +
  scale_colour_manual(values = genotype_palette ) +
  coord_cartesian(ylim = c(0, NA), clip = 'on') +
  labs(x = 'Distance to CGI center (kb)', 
       y = 'Spike-in normalised JARID2 signal') +
  profile_th -> pMetaPlot_CGI_JARID2
pMetaPlot_CGI_JARID2

Save to pdf.

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("Fig5H_JARID2_Metagene_Profiles.pdf"),
       plot = pMetaPlot_CGI_JARID2, device = cairo_pdf, units = "cm",
       width = 7.3, height = 4.9)

Save for main figure.

Code
Meta_JARID2$Epitope <- 'JARID2'

Meta_JARID2 |>
  subset(type == 'PRC2 targeted CGI') |>
  ggplot() +
  aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
  facet_wrap2( ~Epitope) +
  geom_line(linewidth = 0.5) +
  scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.1) ), 
                     breaks = c(-10, -5, 0, 5, 10) ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.05), add = 0 ), n.breaks = 5 ) +
  scale_colour_manual(values = genotype_palette ) +
  coord_cartesian(ylim = c(0, NA), xlim = c(-10, 10),clip = 'on') +
  labs(x = 'Distance to CGI center (kb)', 
       y = 'Spike-in normalised signal at SUZ12 targeted CGI') +
  profile_th +
  theme(legend.position = c(0.85, 0.87)) -> pMetaPlot_JARID2
pMetaPlot_JARID2

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("Fig5H_JARID2_Targeted_CGI_Metagene_Profiles.pdf"),
       plot = pMetaPlot_JARID2, device = cairo_pdf, units = "cm",
       width = 4, height = 4.9)

3.7 AEBP2 signal at PRC2 targeted (or not) CpG Islands

Import AEBP2 targeted CGI signal not spike-in normalised for metagene profile

Code
trgtd_AEBP2_WT1 <- read_prfl(path_dir = round21_meta_dir,
                             txt_name = "nA1A_GENEplot_5000/GENEprofile_nA1A_5000.txt",
                             epitope = "AEBP2", geneset_type = "PRC2 targeted CGI", 
                             sample_name = "WT1")

trgtd_AEBP2_WT2 <- read_prfl(path_dir = round21_meta_dir,
                             txt_name = "nA2A_GENEplot_5000/GENEprofile_nA2A_5000.txt",
                             epitope = "AEBP2", geneset_type = "PRC2 targeted CGI", 
                             sample_name = "WT2")

trgtd_AEBP2_CSex4a <- read_prfl(path_dir = round21_meta_dir,
                                txt_name = "nA3A_GENEplot_5000/GENEprofile_nA3A_5000.txt",
                                epitope = "AEBP2", geneset_type = "PRC2 targeted CGI", 
                                sample_name = "CSex4a")

trgtd_AEBP2_CSex4b <- read_prfl(path_dir = round21_meta_dir,
                                txt_name = "nA4A_GENEplot_5000/GENEprofile_nA4A_5000.txt",
                                epitope = "AEBP2", geneset_type = "PRC2 targeted CGI", 
                                sample_name = "CSex4b")

trgtd_AEBP2_dex4a <- read_prfl(path_dir = round21_meta_dir,
                               txt_name = "nA5A_GENEplot_5000/GENEprofile_nA5A_5000.txt",
                               epitope = "AEBP2", geneset_type = "PRC2 targeted CGI", 
                               sample_name = "∆ex4a")

trgtd_AEBP2_dex4b <- read_prfl(path_dir = round21_meta_dir,
                               txt_name = "nA6A_GENEplot_5000/GENEprofile_nA6A_5000.txt",
                               epitope = "AEBP2", geneset_type = "PRC2 targeted CGI", 
                               sample_name = "∆ex4b")

CGI_targeted_AEBP2 <- rbind(trgtd_AEBP2_WT1, trgtd_AEBP2_WT2, 
                            trgtd_AEBP2_CSex4a, trgtd_AEBP2_CSex4b,
                            trgtd_AEBP2_dex4a, trgtd_AEBP2_dex4b) |>
  as_tibble()

Import AEBP2 signal at non-targeted CGI.

Code
nontrgtd_AEBP2_WT1 <- read_prfl(path_dir = round21_meta_dir,
                                txt_name = "nA1B_GENEplot_5000/GENEprofile_nA1B_5000.txt",
                                epitope = "AEBP2", geneset_type = "Other CGI", 
                                sample_name = "WT1")

nontrgtd_AEBP2_WT2 <- read_prfl(path_dir = round21_meta_dir,
                                txt_name = "nA2B_GENEplot_5000/GENEprofile_nA2B_5000.txt",
                                epitope = "AEBP2", geneset_type = "Other CGI", 
                                sample_name = "WT2")

nontrgtd_AEBP2_CSex4a <- read_prfl(path_dir = round21_meta_dir,
                                   txt_name = "nA3B_GENEplot_5000/GENEprofile_nA3B_5000.txt",
                                   epitope = "AEBP2", geneset_type = "Other CGI", 
                                   sample_name = "CSex4a")

nontrgtd_AEBP2_CSex4b <- read_prfl(path_dir = round21_meta_dir,
                                   txt_name = "nA4B_GENEplot_5000/GENEprofile_nA4B_5000.txt",
                                   epitope = "AEBP2", geneset_type = "Other CGI", 
                                   sample_name = "CSex4b")

nontrgtd_AEBP2_dex4a <- read_prfl(path_dir = round21_meta_dir,
                                  txt_name = "nA5B_GENEplot_5000/GENEprofile_nA5B_5000.txt",
                                  epitope = "AEBP2", geneset_type = "Other CGI", 
                                  sample_name = "∆ex4a")

nontrgtd_AEBP2_dex4b <- read_prfl(path_dir = round21_meta_dir,
                                  txt_name = "nA6B_GENEplot_5000/GENEprofile_nA6B_5000.txt",
                                  epitope = "AEBP2", geneset_type = "Other CGI", 
                                  sample_name = "∆ex4b")

CGI_other_AEBP2 <- rbind(nontrgtd_AEBP2_WT1, nontrgtd_AEBP2_WT2, 
                         nontrgtd_AEBP2_CSex4a, nontrgtd_AEBP2_CSex4b,
                         nontrgtd_AEBP2_dex4a, nontrgtd_AEBP2_dex4b) |>
  as_tibble()

Combine data and reshape it for plotting.

Code
Meta_AEBP2 <- rbind(CGI_targeted_AEBP2, CGI_other_AEBP2) |>
  mutate(MetaPosition = MetaPosition - 5000) |>
  mutate(Genotype = str_remove(pattern = "[a1-z9]$", string = Sample)) |>
  mutate(type = factor(type, levels = c("PRC2 targeted CGI", "Other CGI") )) |>
  group_by(Genotype, type, MetaPosition) |>
  mutate(Average_Signal = mean(AEBP2_signal, na.rm = T) )|>
  arrange(MetaPosition) |>
  ungroup() |>
  mutate(Genotype = factor(Genotype, levels = c('WT', 'CSex4', '∆ex4') ) ) |>
  mutate(Sample = factor(Sample, levels = c('WT1', 'WT2', 'CSex4a', 'CSex4b', '∆ex4a', '∆ex4b') ) ) |>
  mutate(MetaPosition = MetaPosition / 1000) |> # turn bases in kilo-bases
  # subset a small a fraction of the total.
  select(MetaPosition, type, Genotype, Average_Signal) |> 
  unique() |> sample_frac(size = 0.25) 

Plot AEBP2 profile

Code
genotype_palette <- c('WT' = "#377AA3", 'CSex4' = "#B65120", '∆ex4' = "#F7CB48")
ggplot(Meta_AEBP2) +
  aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
  facet_wrap2( ~type, strip = Metaplot_coloured_strip,
              labeller = labeller(type = strip_CGI_conversion) ) +
  geom_line(linewidth = 0.5) +
  scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.1) ), 
                     breaks = c(-10, -5, 0, 5, 10) ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.05), add = 0 ), n.breaks = 3 ) +
  scale_colour_manual(values = genotype_palette ) +
  coord_cartesian(ylim = c(0.05, 0.15), clip = 'on') +
  labs(x = 'Distance to CGI center (kb)', 
       y = 'AEBP2 ChIP-seq signal') +
  profile_th -> pMetaPlot_CGI_AEBP2
pMetaPlot_CGI_AEBP2

Save to pdf.

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("FigS5J_AEBP2_Metagene_Profiles.pdf"),
       plot = pMetaPlot_CGI_AEBP2, device = cairo_pdf, units = "cm",
       width = 6.7, height = 4.7)

Plot for supplementary figure

Code
Meta_AEBP2$Epitope <- 'AEBP2'

Meta_AEBP2 |>
  subset(type == 'PRC2 targeted CGI') |>
  ggplot() +
  aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
  facet_wrap2( ~Epitope) +
  geom_line(linewidth = 0.5) +
  scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.1) ), 
                     breaks = c(-10, -5, 0, 5, 10) ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.05), add = 0 ), n.breaks = 3 ) +
  scale_colour_manual(values = genotype_palette ) +
  coord_cartesian(ylim = c(0.05, 0.15), clip = 'on') +
  labs(x = 'Distance to CGI center (kb)', 
       y = 'ChIP-seq signal at SUZ12 targeted CGI') +
  profile_th +
  theme(legend.position = c(0.85, 0.87)) -> pMetaPlot_AEBP2
pMetaPlot_AEBP2

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("FigS5J_AEBP2_Targeted_CGI_Metagene_Profiles.pdf"),
       plot = pMetaPlot_AEBP2, device = cairo_pdf, units = "cm",
       width = 4, height = 4.9)

3.8 RING1B signal at PRC2 targeted (or not) CpG Islands

Import RING1B targeted CGI signal not spike-in normalised for metagene profile

Code
trgtd_RING1B_WT1 <- read_prfl(path_dir = round21_meta_dir,
                              txt_name = "nR1A_GENEplot_5000/GENEprofile_nR1A_5000.txt",
                              epitope = "RING1B", geneset_type = "PRC2 targeted CGI", 
                              sample_name = "WT1")

trgtd_RING1B_WT2 <- read_prfl(path_dir = round21_meta_dir,
                              txt_name = "nR2A_GENEplot_5000/GENEprofile_nR2A_5000.txt",
                              epitope = "RING1B", geneset_type = "PRC2 targeted CGI", 
                              sample_name = "WT2")

trgtd_RING1B_CSex4a <- read_prfl(path_dir = round21_meta_dir,
                                 txt_name = "nR3A_GENEplot_5000/GENEprofile_nR3A_5000.txt",
                                 epitope = "RING1B", geneset_type = "PRC2 targeted CGI", 
                                 sample_name = "CSex4a")

trgtd_RING1B_CSex4b <- read_prfl(path_dir = round21_meta_dir,
                                 txt_name = "nR4A_GENEplot_5000/GENEprofile_nR4A_5000.txt",
                                 epitope = "RING1B", geneset_type = "PRC2 targeted CGI", 
                                 sample_name = "CSex4b")

trgtd_RING1B_dex4a <- read_prfl(path_dir = round21_meta_dir,
                                txt_name = "nR5A_GENEplot_5000/GENEprofile_nR5A_5000.txt",
                                epitope = "RING1B", geneset_type = "PRC2 targeted CGI", 
                                sample_name = "∆ex4a")

trgtd_RING1B_dex4b <- read_prfl(path_dir = round21_meta_dir,
                                txt_name = "nR6A_GENEplot_5000/GENEprofile_nR6A_5000.txt",
                                epitope = "RING1B", geneset_type = "PRC2 targeted CGI", 
                                sample_name = "∆ex4b")

CGI_targeted_RING1B <- rbind(trgtd_RING1B_WT1, trgtd_RING1B_WT2, 
                             trgtd_RING1B_CSex4a, trgtd_RING1B_CSex4b,
                             trgtd_RING1B_dex4a, trgtd_RING1B_dex4b) |>
  as_tibble()

Import RING1B signal at non-targeted CGI.

Code
nontrgtd_RING1B_WT1 <- read_prfl(path_dir = round21_meta_dir,
                                 txt_name = "nR1B_GENEplot_5000/GENEprofile_nR1B_5000.txt",
                                 epitope = "RING1B", geneset_type = "Other CGI", 
                                 sample_name = "WT1")

nontrgtd_RING1B_WT2 <- read_prfl(path_dir = round21_meta_dir,
                                 txt_name = "nR2B_GENEplot_5000/GENEprofile_nR2B_5000.txt",
                                 epitope = "RING1B", geneset_type = "Other CGI", 
                                 sample_name = "WT2")

nontrgtd_RING1B_CSex4a <- read_prfl(path_dir = round21_meta_dir,
                                    txt_name = "nR3B_GENEplot_5000/GENEprofile_nR3B_5000.txt",
                                    epitope = "RING1B", geneset_type = "Other CGI", 
                                    sample_name = "CSex4a")

nontrgtd_RING1B_CSex4b <- read_prfl(path_dir = round21_meta_dir,
                                    txt_name = "nR4B_GENEplot_5000/GENEprofile_nR4B_5000.txt",
                                    epitope = "RING1B", geneset_type = "Other CGI", 
                                    sample_name = "CSex4b")

nontrgtd_RING1B_dex4a <- read_prfl(path_dir = round21_meta_dir,
                                   txt_name = "nR5B_GENEplot_5000/GENEprofile_nR5B_5000.txt",
                                   epitope = "RING1B", geneset_type = "Other CGI", 
                                   sample_name = "∆ex4a")

nontrgtd_RING1B_dex4b <- read_prfl(path_dir = round21_meta_dir,
                                   txt_name = "nR6B_GENEplot_5000/GENEprofile_nR6B_5000.txt",
                                   epitope = "RING1B", geneset_type = "Other CGI", 
                                   sample_name = "∆ex4b")

CGI_other_RING1B <- rbind(nontrgtd_RING1B_WT1, nontrgtd_RING1B_WT2, 
                         nontrgtd_RING1B_CSex4a, nontrgtd_RING1B_CSex4b,
                         nontrgtd_RING1B_dex4a, nontrgtd_RING1B_dex4b) |>
  as_tibble()

Combine data and reshape it for plotting.

Code
Meta_RING1B <- rbind(CGI_targeted_RING1B, CGI_other_RING1B) |>
  mutate(MetaPosition = MetaPosition - 5000) |>
  mutate(Genotype = str_remove(pattern = "[a1-z9]$", string = Sample)) |>
  mutate(type = factor(type, levels = c("PRC2 targeted CGI", "Other CGI") )) |>
  group_by(Genotype, type, MetaPosition) |>
  mutate(Average_Signal = mean(RING1B_signal, na.rm = T) )|>
  arrange(MetaPosition) |>
  ungroup() |>
  mutate(Genotype = factor(Genotype, levels = c('WT', 'CSex4', '∆ex4') ) ) |>
  mutate(Sample = factor(Sample, levels = c('WT1', 'WT2', 'CSex4a', 'CSex4b', '∆ex4a', '∆ex4b') ) ) |>
  mutate(MetaPosition = MetaPosition / 1000) |> # turn bases in kilo-bases
  # subset a small a fraction of the total.
  select(MetaPosition, type, Genotype, Average_Signal) |> 
  unique() |> sample_frac(size = 0.25) 

Plot RING1B profile

Code
ggplot(Meta_RING1B) +
  aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
  facet_wrap2( ~type, strip = Metaplot_coloured_strip,
              labeller = labeller(type = strip_CGI_conversion) ) +
  geom_line(linewidth = 0.5) +
  scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.1) ), 
                     breaks = c(-10, -5, 0, 5, 10) ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.05), add = 0 ) ) +
  scale_colour_manual(values = genotype_palette ) +
  coord_cartesian(ylim = c(0.06, NA), clip = 'on') +
  labs(x = 'Distance to CGI center (kb)', 
       y = 'RING1B ChIP-seq signal') +
  profile_th -> pMetaPlot_CGI_RING1B
pMetaPlot_CGI_RING1B

Save to pdf.

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("Fig5J_RING1B_Metagene_Profiles.pdf"),
       plot = pMetaPlot_CGI_RING1B, device = cairo_pdf, units = "cm",
       width = 7.3, height = 4.9)

Plot for main figure

Code
Meta_RING1B$Epitope <- 'RING1B'

Meta_RING1B |>
  subset(type == 'PRC2 targeted CGI') |>
  ggplot() +
  aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
  facet_wrap2( ~Epitope) +
  geom_line(linewidth = 0.5) +
  scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.1) ), 
                     breaks = c(-10, -5, 0, 5, 10) ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.05), add = 0 ), n.breaks = 6 ) +
  scale_colour_manual(values = genotype_palette ) +
  coord_cartesian(ylim = c(0.05, 0.32), xlim = c(-10, 10), clip = 'on') +
  labs(x = 'Distance to CGI center (kb)', 
       y = 'ChIP-seq signal at SUZ12 targeted CGI') +
  profile_th +
  theme(legend.position = c(0.85, 0.87)) -> pMetaPlot_RING1B
pMetaPlot_RING1B

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("Fig5J_RING1B_Targeted_CGI_Metagene_Profiles.pdf"),
       plot = pMetaPlot_RING1B, device = cairo_pdf, units = "cm",
       width = 4, height = 4.9)

3.9 H2Aub signal at PRC2 targeted (or not) CpG Islands

Import H2Aub targeted CGI signal not spike-in normalised for metagene profile

Code
trgtd_H2Aub_WT1 <- read_prfl(path_dir = round21_meta_dir,
                             txt_name = "nU1A_GENEplot_5000/GENEprofile_nU1A_5000.txt",
                             epitope = "H2Aub", geneset_type = "PRC2 targeted CGI", 
                             sample_name = "WT1")

trgtd_H2Aub_WT2 <- read_prfl(path_dir = round21_meta_dir,
                             txt_name = "nU2A_GENEplot_5000/GENEprofile_nU2A_5000.txt",
                             epitope = "H2Aub", geneset_type = "PRC2 targeted CGI", 
                             sample_name = "WT2")

trgtd_H2Aub_CSex4a <- read_prfl(path_dir = round21_meta_dir,
                                txt_name = "nU3A_GENEplot_5000/GENEprofile_nU3A_5000.txt",
                                epitope = "H2Aub", geneset_type = "PRC2 targeted CGI", 
                                sample_name = "CSex4a")

trgtd_H2Aub_CSex4b <- read_prfl(path_dir = round21_meta_dir,
                                txt_name = "nU4A_GENEplot_5000/GENEprofile_nU4A_5000.txt",
                                epitope = "H2Aub", geneset_type = "PRC2 targeted CGI", 
                                sample_name = "CSex4b")

trgtd_H2Aub_dex4a <- read_prfl(path_dir = round21_meta_dir,
                               txt_name = "nU5A_GENEplot_5000/GENEprofile_nU5A_5000.txt",
                               epitope = "H2Aub", geneset_type = "PRC2 targeted CGI", 
                               sample_name = "∆ex4a")

trgtd_H2Aub_dex4b <- read_prfl(path_dir = round21_meta_dir,
                               txt_name = "nU6A_GENEplot_5000/GENEprofile_nU6A_5000.txt",
                               epitope = "H2Aub", geneset_type = "PRC2 targeted CGI", 
                               sample_name = "∆ex4b")

CGI_targeted_H2Aub <- rbind(trgtd_H2Aub_WT1, trgtd_H2Aub_WT2, 
                            trgtd_H2Aub_CSex4a, trgtd_H2Aub_CSex4b,
                            trgtd_H2Aub_dex4a, trgtd_H2Aub_dex4b) |>
  as_tibble()

Import H2Aub signal at non-targeted CGI.

Code
nontrgtd_H2Aub_WT1 <- read_prfl(path_dir = round21_meta_dir,
                                 txt_name = "nU1B_GENEplot_5000/GENEprofile_nU1B_5000.txt",
                                 epitope = "H2Aub", geneset_type = "Other CGI", 
                                 sample_name = "WT1")

nontrgtd_H2Aub_WT2 <- read_prfl(path_dir = round21_meta_dir,
                                 txt_name = "nU2B_GENEplot_5000/GENEprofile_nU2B_5000.txt",
                                 epitope = "H2Aub", geneset_type = "Other CGI", 
                                 sample_name = "WT2")

nontrgtd_H2Aub_CSex4a <- read_prfl(path_dir = round21_meta_dir,
                                    txt_name = "nU3B_GENEplot_5000/GENEprofile_nU3B_5000.txt",
                                    epitope = "H2Aub", geneset_type = "Other CGI", 
                                    sample_name = "CSex4a")

nontrgtd_H2Aub_CSex4b <- read_prfl(path_dir = round21_meta_dir,
                                    txt_name = "nU4B_GENEplot_5000/GENEprofile_nU4B_5000.txt",
                                    epitope = "H2Aub", geneset_type = "Other CGI", 
                                    sample_name = "CSex4b")

nontrgtd_H2Aub_dex4a <- read_prfl(path_dir = round21_meta_dir,
                                   txt_name = "nU5B_GENEplot_5000/GENEprofile_nU5B_5000.txt",
                                   epitope = "H2Aub", geneset_type = "Other CGI", 
                                   sample_name = "∆ex4a")

nontrgtd_H2Aub_dex4b <- read_prfl(path_dir = round21_meta_dir,
                                   txt_name = "nU6B_GENEplot_5000/GENEprofile_nU6B_5000.txt",
                                   epitope = "H2Aub", geneset_type = "Other CGI", 
                                   sample_name = "∆ex4b")

CGI_other_H2Aub <- rbind(nontrgtd_H2Aub_WT1, nontrgtd_H2Aub_WT2, 
                         nontrgtd_H2Aub_CSex4a, nontrgtd_H2Aub_CSex4b,
                         nontrgtd_H2Aub_dex4a, nontrgtd_H2Aub_dex4b) |>
  as_tibble()

Combine data and reshape it for plotting.

Code
Meta_H2Aub <- rbind(CGI_targeted_H2Aub, CGI_other_H2Aub) |>
  mutate(MetaPosition = MetaPosition - 5000) |>
  mutate(Genotype = str_remove(pattern = "[a1-z9]$", string = Sample)) |>
  mutate(type = factor(type, levels = c("PRC2 targeted CGI", "Other CGI") )) |>
  group_by(Genotype, type, MetaPosition) |>
  mutate(Average_Signal = mean(H2Aub_signal, na.rm = T) )|>
  arrange(MetaPosition) |>
  ungroup() |>
  mutate(Genotype = factor(Genotype, levels = c('WT', 'CSex4', '∆ex4') ) ) |>
  mutate(Sample = factor(Sample, levels = c('WT1', 'WT2', 'CSex4a', 'CSex4b', '∆ex4a', '∆ex4b') ) ) |>
  mutate(MetaPosition = MetaPosition / 1000) |> # turn bases in kilo-bases
  # subset a small a fraction of the total.
  select(MetaPosition, type, Genotype, Average_Signal) |> 
  unique() |> sample_frac(size = 0.25) 

Plot H2Aub profile

Code
ggplot(Meta_H2Aub) +
  aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
  facet_wrap2( ~type, strip = Metaplot_coloured_strip,
              labeller = labeller(type = strip_CGI_conversion) ) +
  geom_line(linewidth = 0.5) +
  scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.1) ), 
                     breaks = c(-10, -5, 0, 5, 10) ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.05), add = 0 ) ) +
  scale_colour_manual(values = genotype_palette ) +
  coord_cartesian(ylim = c(0.025, NA), clip = 'on') +
  labs(x = 'Distance to CGI center (kb)', 
       y = 'H2AK119ub ChIP-seq signal') +
  profile_th -> pMetaPlot_CGI_H2Aub
pMetaPlot_CGI_H2Aub

Save to pdf.

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("Fig5K_H2AK119ub_Metagene_Profiles.pdf"),
       plot = pMetaPlot_CGI_H2Aub, device = cairo_pdf, units = "cm",
       width = 7.3, height = 4.9)

Plot for main figure

Code
Meta_H2Aub$Epitope <- 'H2AK119ub'

Meta_H2Aub |>
  subset(type == 'PRC2 targeted CGI') |>
  ggplot() +
  aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
  facet_wrap2( ~Epitope) +
  geom_line(linewidth = 0.5) +
  scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.1) ), 
                     breaks = c(-10, -5, 0, 5, 10) ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.05), add = 0 ), n.breaks = 6 ) +
  scale_colour_manual(values = genotype_palette ) +
  coord_cartesian(ylim = c(0.05, 0.32), clip = 'on') +
  labs(x = 'Distance to CGI center (kb)', 
       y = 'ChIP-seq signal at SUZ12 targeted CGI') +
  profile_th +
  theme(legend.position = c(0.85, 0.87)) -> pMetaPlot_CGI_H2Aub
pMetaPlot_CGI_H2Aub

Code
ggsave(path = pdf_dir_fig5, 
       filename = paste0("Fig5K_H2AK119ub_Targeted_CGI_Metagene_Profiles.pdf"),
       plot = pMetaPlot_CGI_H2Aub, device = cairo_pdf, units = "cm",
       width = 4, height = 4.9)

4 Supplementary Figure Panels

4.1 Venn diagram SUZ12 peaks

Check the overlap of the genes that contain a peak of SUZ12 in their promoter.

Code
CSex4 <- read.table(file.path(round3_suz12_peaks_dir, "Suz12_100.txt"))$V1
WT_1 <- read.table(file.path(round3_suz12_peaks_dir, "Suz12_WT1.txt"))$V1
WT_2 <- read.table(file.path(round3_suz12_peaks_dir, "Suz12_WT2.txt"))$V1
dEx4_1 <- read.table(file.path(round3_suz12_peaks_dir, "Suz12_KO1.txt"))$V1
dEx4_2 <- read.table(file.path(round3_suz12_peaks_dir, "Suz12_KO2.txt"))$V1

WT <- WT_1[WT_1%in%WT_2]
dEx4 <- dEx4_1[dEx4_1%in%dEx4_2]

message("Num of genes with SUZ12 peaks:\nWT#1:\t", length(WT_1), '\nWT#2:\t', length(WT_2), 
        '\nWT shared: ', length(WT),
        '\n∆ex4#1:\t', length(dEx4_1), '\n∆ex4#2:\t', length(dEx4_2), '\n∆ex4 shared: ', length(dEx4),
        '\nCSex4:\t', length(CSex4))
Num of genes with SUZ12 peaks:
WT#1:   3316
WT#2:   3924
WT shared: 3203
∆ex4#1: 4846
∆ex4#2: 2803
∆ex4 shared: 2773
CSex4:  3446

Save to pdf the peaks overlap as venn diagram

Code
draw.venn( WT,CSex4,dEx4,
          title = "SUZ12 target overlap",
          subtitle = "",
          nrtype = "pct",
          # t_f = "Arial",
          xtitle = "WT", #xt_f = "Arial",
          ytitle = "CSex4", #yt_f = "Arial",
          ztitle = "∆ex4", #zt_f = "Arial",
          x_c = "#377AA3", y_c = "#B65120", z_c = "#F7CB48", 
          #nr_f = "Arial", 
          nr_fb = 1, 
          output = 'pdf', width = 500, height = 500, 
          filename = file.path(pdf_dir_fig5, 'FigS5A_SUZ12_targets_VennDiagram.pdf') )

Now do the same for H3K27me3

Code
CSex4 <- read.table(file.path(round2_h3k27me3_peaks_dir, "H3K27me3_100.txt"))$V1
WT_1 <- read.table(file.path(round2_h3k27me3_peaks_dir, "H3K27me3_WT1.txt"))$V1
WT_2 <- read.table(file.path(round2_h3k27me3_peaks_dir, "H3K27me3_WT2.txt"))$V1
dEx4_1 <- read.table(file.path(round2_h3k27me3_peaks_dir, "H3K27me3_KO1.txt"))$V1
dEx4_2 <- read.table(file.path(round2_h3k27me3_peaks_dir, "H3K27me3_KO2.txt"))$V1

WT <- WT_1[WT_1%in%WT_2]
dEx4 <- dEx4_1[dEx4_1%in%dEx4_2]

message("Num of genes with H3K27me3 peaks:\nWT#1:\t", length(WT_1), '\nWT#2:\t', length(WT_2), 
        '\nWT shared: ', length(WT),
        '\n∆ex4#1:\t', length(dEx4_1), '\n∆ex4#2:\t', length(dEx4_2), '\n∆ex4 shared: ', length(dEx4),
        '\nCSex4:\t', length(CSex4))
Num of genes with H3K27me3 peaks:
WT#1:   5039
WT#2:   5973
WT shared: 4861
∆ex4#1: 7214
∆ex4#2: 5470
∆ex4 shared: 4717
CSex4:  3623

Plot:

Code
draw.venn(WT,CSex4,dEx4,
          title = "H3K27me3 target overlap",
          subtitle = "", nrtype = "pct",
          xtitle = "WT", #xt_f = "Arial",
          ytitle = "CSex4", #yt_f = "Arial",
          ztitle = "∆ex4", #zt_f = "Arial",
          x_c = "#377AA3", y_c = "#B65120", z_c = "#F7CB48", 
          #nr_f = "Arial", 
          nr_fb = 1, 
          output = 'pdf', width = 500, height = 500, 
          filename = file.path(pdf_dir_fig5, 'FigS5A_H3K27me3_targets_VennDiagram.pdf'))

4.2 ChIP-qPCR controls

Code
genotype_palette <- c('WT' = "#377AA3", 'CSex4' = "#B65120", '∆ex4' = "#F7CB48", 'KO' = "#728189")
ChIP_qPCR_dir <- file.path(oneDrive_Dir, '11_ChIP/ZChIP_test2')
qPCR_path <- list.files(ChIP_qPCR_dir, pattern = '210603_ZChIP_test2.xlsx', full.names = T)
Code
WT_samples <- c("WTp", "WT#2")
CSex4_samples <- c("CSex4")
dEx4_samples <- c("∆ex4#2", "∆ex4#3")
KO_samples <- c("KO#1", "KO#2")
Code
data <- read_excel(qPCR_path, sheet = "melt")
# fix old name
data <- data |>
  mutate(Sample = ifelse(Sample == 'WT4C', yes = 'CSex4', no = Sample)) 
data <- data |>
  mutate(Genotype = case_when(Sample %in% WT_samples ~ 'WT',
                              Sample %in% CSex4_samples ~ 'CSex4',
                              Sample %in% dEx4_samples ~ '∆ex4',
                              Sample %in% KO_samples ~ 'KO') ) |>
  mutate(Genotype = factor(Genotype, levels = c('WT', 'CSex4', '∆ex4','KO') ) ) |>
  mutate(Sample = factor(Sample, levels = unique(data$Sample) ) ) |>
  mutate(Gene = factor(Gene, levels = c("Gsc","Hoxd13","Nes","T (Bra)","Actb","Sox2") ) )

Calculate avg for barplot

Code
avg <- data %>% 
  group_by(Gene, Sample, Genotype) %>% 
  summarise(value = mean(value), .groups = 'keep')

Plot ChIP-qPCR

Code
ggplot(data)+
  aes(x=Sample, y=value, fill = Genotype) +
  facet_grid(~Gene)+
  geom_col(data = avg, show.legend = F, width = 0.95 )+
  geom_point(size = 1, shape = 21, stroke = 0.2)+
  scale_fill_manual(values = genotype_palette ) +
  scale_x_discrete(labels = c('WT', '', 'CSex4', '', '∆ex4', '', 'KO')) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.02), add = 0)) +
  labs(y = "% of input") +
  theme_classic(base_size = 6, base_family = 'Arial') +
  theme(legend.position = c(0.92, 0.85),
        legend.background = element_blank(),
        legend.title = element_blank(),
        legend.key = element_blank(),
        legend.key.size = unit(1, 'mm'),
        legend.text = element_text(margin = margin(l = -1)),
        panel.grid.major.y = element_line(linewidth = 0.2),
        axis.text = element_text(colour = 'black'),
        axis.text.x = element_text(angle = 45, hjust = 1, margin = margin(t = -1, unit = 'mm')),
        axis.title = element_text(size = 5, colour = 'black'),
        axis.title.x = element_blank(),
        axis.title.y = element_text(margin = margin(r = 0)),
        axis.ticks.length = unit(x = 1, units = 'mm'),
        axis.ticks.x = element_blank(),
        axis.line = element_line(linewidth = 0.2),
        strip.background = element_blank(),
        panel.background = element_blank(),
        plot.background = element_blank()) -> p_ChIP_qCPR
p_ChIP_qCPR

Code
ggsave(filename = 'FigS5C_ChIP_qPCR_PRC2_targets.pdf', 
       path = pdf_dir_fig5, plot = p_ChIP_qCPR,
       device = cairo_pdf, units = 'mm', width = 60, height = 50)

4.3 Individual replicates metagene profiles without spike-in normalisation

4.4 Spie chart distribution H3K27me3 in ∆ex4 ESCs.

Spie chart of the percentage distribution of H3K27me3 peaks in ∆ex4 ESCs. The radial increase corresponds to the fold-change relative to WT ESCs.

Code from Ivano’s script

Code
# WT vs genome
x <- c(3100,2677,278)
y <- c(4661,3143,1248)
percentX <-x/sum(x)
percentY <-y/sum(y)
percentY <- (percentX/percentY)/sum(percentX/percentY)
names(percentX) <- c("Promoter","Genic","Intergenic")
names(percentY) <- names(percentX)
spie(percentY, percentX, multi = c(0.5, 1, 1.5), lwd = 1,
     pie.labs = TRUE, grid = TRUE, grid.labs = TRUE, scale = TRUE, p1.circle = TRUE,
     bg = c(NA, NA, NA),
     col = c("thistle","palegreen","#728189") )

Save to pdf.

Code
cairo_pdf(file = file.path(pdf_dir_fig5, 'FigS5E_H3K27me3_peaks_location_SpieChart.pdf'), 
          width = 4, height = 4)
spie(percentY, percentX, multi = c(0.5, 1, 1.5), lwd = 1,
     pie.labs = TRUE, grid = TRUE, grid.labs = TRUE, scale = TRUE, p1.circle = TRUE,
     bg = c(NA, NA, NA),
     col = c("thistle","palegreen","#728189") ) 
dev.off()
quartz_off_screen 
                2 

End of analysis.

5 Session Info

Code
sessioninfo::session_info()
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.1.2 (2021-11-01)
 os       macOS Catalina 10.15.7
 system   x86_64, darwin17.0
 ui       X11
 language (EN)
 collate  C
 ctype    UTF-8
 tz       Europe/Madrid
 date     2024-02-07
 pandoc   2.10.1 @ /usr/local/bin/ (via rmarkdown)

─ Packages ───────────────────────────────────────────────────────────────────
 package          * version   date (UTC) lib source
 abind              1.4-5     2016-07-21 [1] CRAN (R 4.1.0)
 AnnotationDbi      1.56.2    2021-11-09 [1] Bioconductor
 backports          1.4.1     2021-12-13 [1] CRAN (R 4.1.0)
 beeswarm           0.4.0     2021-06-01 [1] CRAN (R 4.1.0)
 Biobase            2.54.0    2021-10-26 [1] Bioconductor
 BiocFileCache      2.2.1     2022-01-23 [1] Bioconductor
 BiocGenerics       0.40.0    2021-10-26 [1] Bioconductor
 biomaRt            2.50.3    2022-02-06 [1] Bioconductor
 Biostrings         2.62.0    2021-10-26 [1] Bioconductor
 BioVenn          * 1.1.3     2021-06-19 [1] CRAN (R 4.1.0)
 bit                4.0.5     2022-11-15 [1] CRAN (R 4.1.2)
 bit64              4.0.5     2020-08-30 [1] CRAN (R 4.1.0)
 bitops             1.0-7     2021-04-24 [1] CRAN (R 4.1.0)
 blob               1.2.4     2023-03-17 [1] CRAN (R 4.1.2)
 broom              1.0.5     2023-06-09 [1] CRAN (R 4.1.2)
 bslib              0.5.1     2023-08-11 [1] CRAN (R 4.1.2)
 cachem             1.0.8     2023-05-01 [1] CRAN (R 4.1.2)
 Cairo              1.6-1     2023-08-18 [1] CRAN (R 4.1.2)
 car                3.1-2     2023-03-30 [1] CRAN (R 4.1.2)
 carData            3.0-5     2022-01-06 [1] CRAN (R 4.1.2)
 caroline         * 0.9.0     2022-11-12 [1] CRAN (R 4.1.2)
 cellranger         1.1.0     2016-07-27 [1] CRAN (R 4.1.0)
 cli                3.6.1     2023-03-23 [1] CRAN (R 4.1.2)
 colorspace         2.1-0     2023-01-23 [1] CRAN (R 4.1.2)
 crayon             1.5.2     2022-09-29 [1] CRAN (R 4.1.2)
 crosstalk          1.2.0     2021-11-04 [1] CRAN (R 4.1.0)
 curl               5.2.0     2023-12-08 [1] CRAN (R 4.1.2)
 DBI                1.1.3     2022-06-18 [1] CRAN (R 4.1.2)
 dbplyr             2.3.3     2023-07-07 [1] CRAN (R 4.1.2)
 digest             0.6.33    2023-07-07 [1] CRAN (R 4.1.2)
 dplyr            * 1.1.2     2023-04-20 [1] CRAN (R 4.1.2)
 DT               * 0.29      2023-08-29 [1] CRAN (R 4.1.2)
 ellipsis           0.3.2     2021-04-29 [1] CRAN (R 4.1.0)
 evaluate           0.21      2023-05-05 [1] CRAN (R 4.1.2)
 fansi              1.0.4     2023-01-22 [1] CRAN (R 4.1.2)
 farver             2.1.1     2022-07-06 [1] CRAN (R 4.1.2)
 fastmap            1.1.1     2023-02-24 [1] CRAN (R 4.1.2)
 filelock           1.0.2     2018-10-05 [1] CRAN (R 4.1.0)
 generics           0.1.3     2022-07-05 [1] CRAN (R 4.1.2)
 GenomeInfoDb       1.30.1    2022-01-30 [1] Bioconductor
 GenomeInfoDbData   1.2.7     2023-08-29 [1] Bioconductor
 ggbeeswarm         0.7.2     2023-04-29 [1] CRAN (R 4.1.2)
 ggforce          * 0.4.1     2022-10-04 [1] CRAN (R 4.1.2)
 ggh4x            * 0.2.5     2023-07-15 [1] CRAN (R 4.1.2)
 ggplot2          * 3.4.3     2023-08-14 [1] CRAN (R 4.1.2)
 ggrastr          * 1.0.2     2023-06-01 [1] CRAN (R 4.1.2)
 ggsignif         * 0.6.4     2022-10-13 [1] CRAN (R 4.1.2)
 glue               1.6.2     2022-02-24 [1] CRAN (R 4.1.2)
 gtable             0.3.4     2023-08-21 [1] CRAN (R 4.1.2)
 hms                1.1.3     2023-03-21 [1] CRAN (R 4.1.2)
 htmltools          0.5.6     2023-08-10 [1] CRAN (R 4.1.2)
 htmlwidgets        1.6.2     2023-03-17 [1] CRAN (R 4.1.2)
 httr               1.4.7     2023-08-15 [1] CRAN (R 4.1.2)
 IRanges            2.28.0    2021-10-26 [1] Bioconductor
 janitor          * 2.2.0     2023-02-02 [1] CRAN (R 4.1.2)
 jquerylib          0.1.4     2021-04-26 [1] CRAN (R 4.1.0)
 jsonlite           1.8.7     2023-06-29 [1] CRAN (R 4.1.2)
 KEGGREST           1.34.0    2021-10-26 [1] Bioconductor
 knitr              1.43      2023-05-25 [1] CRAN (R 4.1.2)
 labeling           0.4.2     2020-10-20 [1] CRAN (R 4.1.0)
 lifecycle          1.0.3     2022-10-07 [1] CRAN (R 4.1.2)
 lubridate          1.9.2     2023-02-10 [1] CRAN (R 4.1.2)
 magrittr           2.0.3     2022-03-30 [1] CRAN (R 4.1.2)
 MASS               7.3-60    2023-05-04 [1] CRAN (R 4.1.2)
 memoise            2.0.1     2021-11-26 [1] CRAN (R 4.1.0)
 munsell            0.5.0     2018-06-12 [1] CRAN (R 4.1.0)
 patchwork        * 1.1.3     2023-08-14 [1] CRAN (R 4.1.2)
 pillar             1.9.0     2023-03-22 [1] CRAN (R 4.1.2)
 pkgconfig          2.0.3     2019-09-22 [1] CRAN (R 4.1.0)
 plotrix            3.8-2     2021-09-08 [1] CRAN (R 4.1.0)
 png                0.1-8     2022-11-29 [1] CRAN (R 4.1.2)
 polyclip           1.10-4    2022-10-20 [1] CRAN (R 4.1.2)
 prettyunits        1.1.1     2020-01-24 [1] CRAN (R 4.1.0)
 progress           1.2.2     2019-05-16 [1] CRAN (R 4.1.0)
 purrr              1.0.2     2023-08-10 [1] CRAN (R 4.1.2)
 R6                 2.5.1     2021-08-19 [1] CRAN (R 4.1.0)
 rappdirs           0.3.3     2021-01-31 [1] CRAN (R 4.1.0)
 Rcpp               1.0.11    2023-07-06 [1] CRAN (R 4.1.2)
 RCurl              1.98-1.12 2023-03-27 [1] CRAN (R 4.1.2)
 readr            * 2.1.4     2023-02-10 [1] CRAN (R 4.1.2)
 readxl           * 1.4.3     2023-07-06 [1] CRAN (R 4.1.2)
 rlang              1.1.1     2023-04-28 [1] CRAN (R 4.1.2)
 rmarkdown          2.24      2023-08-14 [1] CRAN (R 4.1.2)
 RSQLite            2.3.1     2023-04-03 [1] CRAN (R 4.1.2)
 rstatix          * 0.7.2     2023-02-01 [1] CRAN (R 4.1.2)
 rstudioapi         0.15.0    2023-07-07 [1] CRAN (R 4.1.2)
 S4Vectors          0.32.4    2022-03-29 [1] Bioconductor
 sass               0.4.7     2023-07-15 [1] CRAN (R 4.1.2)
 scales             1.2.1     2022-08-20 [1] CRAN (R 4.1.2)
 sessioninfo        1.2.2     2021-12-06 [1] CRAN (R 4.1.0)
 snakecase          0.11.1    2023-08-27 [1] CRAN (R 4.1.2)
 stringi            1.7.12    2023-01-11 [1] CRAN (R 4.1.2)
 stringr          * 1.5.0     2022-12-02 [1] CRAN (R 4.1.2)
 svglite            2.1.1     2023-01-10 [1] CRAN (R 4.1.2)
 systemfonts        1.0.4     2022-02-11 [1] CRAN (R 4.1.2)
 tibble           * 3.2.1     2023-03-20 [1] CRAN (R 4.1.2)
 tidyr            * 1.3.0     2023-01-24 [1] CRAN (R 4.1.2)
 tidyselect         1.2.0     2022-10-10 [1] CRAN (R 4.1.2)
 timechange         0.2.0     2023-01-11 [1] CRAN (R 4.1.2)
 tweenr             2.0.2     2022-09-06 [1] CRAN (R 4.1.2)
 tzdb               0.4.0     2023-05-12 [1] CRAN (R 4.1.2)
 utf8               1.2.3     2023-01-31 [1] CRAN (R 4.1.2)
 vctrs              0.6.3     2023-06-14 [1] CRAN (R 4.1.2)
 vipor              0.4.5     2017-03-22 [1] CRAN (R 4.1.0)
 vroom              1.6.3     2023-04-28 [1] CRAN (R 4.1.2)
 withr              2.5.0     2022-03-03 [1] CRAN (R 4.1.2)
 xfun               0.40      2023-08-09 [1] CRAN (R 4.1.2)
 XML                3.99-0.14 2023-03-19 [1] CRAN (R 4.1.2)
 xml2               1.3.5     2023-07-06 [1] CRAN (R 4.1.2)
 XVector            0.34.0    2021-10-26 [1] Bioconductor
 yaml               2.3.7     2023-01-23 [1] CRAN (R 4.1.2)
 zlibbioc           1.40.0    2021-10-26 [1] Bioconductor

 [1] /Library/Frameworks/R.framework/Versions/4.1/Resources/library

──────────────────────────────────────────────────────────────────────────────